This script will include all steps in the “main pipeline” part of my thesis project. This includes differential analysis of the reference airway Current vs never smoker dataset (A1, GSE63127), differential expression analysis of the TCGA-LUAD lung adenocarcinoma expression and methylation datasets, and the reference “persistent” airway current vs former vs never smoker dataset (A2, GSE7895). This includes all normalization, quality control, and filtering steps.

Notes: - I will opt to keep all genes with FDR < 0.05 until the step with filtering and dataset comparisons.

Loading libraries

library(EnhancedVolcano, verbose = FALSE)
## Loading required package: ggplot2
## Loading required package: ggrepel
## Warning: package 'ggrepel' was built under R version 4.3.3
library(GEOquery, verbose = FALSE)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Setting options('download.file.method.GEOquery'='auto')
## Setting options('GEOquery.inmemory.gpl'=FALSE)
library(limma, verbose = FALSE)
## 
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA
library(umap, verbose = FALSE)
library(dplyr, verbose = FALSE)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

1. Differential expression analysis of reference airway current vs never smoker dataset (A1, GSE63127)

1.1 Loading dataset

# Version info: R 4.2.2, Biobase 2.58.0, GEOquery 2.66.0, limma 3.54.0
################################################################
#   Differential expression analysis with limma


# load series and platform data from GEO (date: 2024/10/15)
gset <- getGEO("GSE63127", GSEMatrix =TRUE, AnnotGPL=TRUE)
## Found 1 file(s)
## GSE63127_series_matrix.txt.gz
if (length(gset) > 1) idx <- grep("GPL570", attr(gset, "names")) else idx <- 1
gset <- gset[[idx]]

# make proper column names to match toptable 
fvarLabels(gset) <- make.names(fvarLabels(gset))

# group membership for all samples
gsms <- paste0("X00100011111X00000000000X00000X000000000000X000X00",
               "XX00X0XXXXXXXXX1111111111111111111111X111X11111111",
               "XXXX1XXXXXXXXXXXXXXXXXXXXXXXX001000000010100111111",
               "01110111110011111001110011101011111001110101100011",
               "111111111111111111111111111111")
sml <- strsplit(gsms, split="")[[1]]

# filter out excluded samples (marked as "X")
sel <- which(sml != "X")
sml <- sml[sel]
gset <- gset[ ,sel]

gset <- gset[complete.cases(exprs(gset)), ] # skip missing values
length(sml) # 182 samples
## [1] 182

1.2 Quality control checks and normalization

2024/11/07: I am going to try doing this without the quantile normalization to see whether it’s still okay

## Make histograms and boxplots to check if the data is log-transformed and needs quantile normalization ##
hist(as.matrix(exprs(gset))) # skewed left, needs log2 transform

boxplot(exprs(gset)) # scary-looking

max(exprs(gset)) # 136808
## [1] 136808
min(exprs(gset)) # 0.0657913
## [1] 0.0657913
# Should do log2 and quantile normalization

## log2 and Quantile normalization ##
exprs(gset) <- log2(exprs(gset)+1)
##exprs(gset) <- normalizeBetweenArrays(exprs(gset)) # quantile normalization: no longer doing this for now


hist(as.matrix(exprs(gset))) # much better

boxplot(exprs(gset)) # Look reasonable

min(exprs(gset)) 
## [1] 0.09192496
max(exprs(gset)) 
## [1] 17.0618

1.3 Checking and correcting batch effect / sources of variation

1.3.1: Download and clean up the phenotypic information table from the dataset

# assign samples to groups and set up design matrix
gs <- factor(sml)
groups <- make.names(c("non-smoker","smoker"))
levels(gs) <- groups
gset$group <- gs

phenotypic_data <- pData(gset)  # Extract phenotypic data

# The phenotypic data is terrible.
# This is filtered down to the samples that were included.
# I will first try to clean up the phenotypic data.
head(phenotypic_data)
##                                   title geo_accession                status
## GSM190150 Small airways, non-smoker 029     GSM190150 Public on Dec 16 2008
## GSM190153 Small airways, non-smoker 036     GSM190153 Public on Jun 17 2008
## GSM254157     small airways, smoker 112     GSM254157 Public on Jun 17 2008
## GSM298223 small airways, non-smoker 050     GSM298223 Public on Jul 13 2009
## GSM298227 small airways, non-smoker 076     GSM298227 Public on Jul 13 2009
## GSM298228 small airways, non-smoker 080     GSM298228 Public on Jul 13 2009
##           submission_date last_update_date type channel_count
## GSM190150     May 17 2007      Aug 28 2018  RNA             1
## GSM190153     May 17 2007      Aug 28 2018  RNA             1
## GSM254157     Jan 03 2008      Aug 28 2018  RNA             1
## GSM298223     Jun 13 2008      Nov 12 2009  RNA             1
## GSM298227     Jun 13 2008      Aug 28 2018  RNA             1
## GSM298228     Jun 13 2008      Aug 28 2018  RNA             1
##                                                         source_name_ch1
## GSM190150 airway epithelial cells obtained by bronchoscopy and brushing
## GSM190153 airway epithelial cells obtained by bronchoscopy and brushing
## GSM254157 airway epithelial cells obtained by bronchoscopy and brushing
## GSM298223 airway epithelial cells obtained by bronchoscopy and brushing
## GSM298227 airway epithelial cells obtained by bronchoscopy and brushing
## GSM298228 airway epithelial cells obtained by bronchoscopy and brushing
##           organism_ch1 characteristics_ch1 characteristics_ch1.1
## GSM190150 Homo sapiens             age: 34                sex: M
## GSM190153 Homo sapiens             age: 45                sex: F
## GSM254157 Homo sapiens             age: 45                sex: M
## GSM298223 Homo sapiens             age: 38                sex: M
## GSM298227 Homo sapiens             age: 29                sex: M
## GSM298228 Homo sapiens             age: 39                sex: F
##            characteristics_ch1.2                 characteristics_ch1.3
## GSM190150    ethnic group: black            smoking status: non-smoker
## GSM190153 ethnic group: hispanic            smoking status: non-smoker
## GSM254157    ethnic group: white smoking status: smoker, 23 pack-years
## GSM298223 ethnic group: hispanic            smoking status: non-smoker
## GSM298227 ethnic group: hispanic            smoking status: non-smoker
## GSM298228    ethnic group: asian            smoking status: non-smoker
##           molecule_ch1
## GSM190150    total RNA
## GSM190153    total RNA
## GSM254157    total RNA
## GSM298223    total RNA
## GSM298227    total RNA
## GSM298228    total RNA
##                                                                                                      extract_protocol_ch1
## GSM190150 Trizol extraction and RNAeasy clean-up of total RNA was performed according to the manufacturer's instructions.
## GSM190153 Trizol extraction and RNAeasy clean-up of total RNA was performed according to the manufacturer's instructions.
## GSM254157 Trizol extraction and RNAeasy clean-up of total RNA was performed according to the manufacturer's instructions.
## GSM298223 Trizol extraction and RNAeasy clean-up of total RNA was performed according to the manufacturer's instructions.
## GSM298227 Trizol extraction and RNAeasy clean-up of total RNA was performed according to the manufacturer's instructions.
## GSM298228 Trizol extraction and RNAeasy clean-up of total RNA was performed according to the manufacturer's instructions.
##           label_ch1
## GSM190150    biotin
## GSM190153    biotin
## GSM254157    biotin
## GSM298223    biotin
## GSM298227    biotin
## GSM298228    biotin
##                                                                                                                                                                  label_protocol_ch1
## GSM190150   Biotinylated cRNA were prepared according to the standard Affymetrix protocol from 3 microg total RNA (Expression Analysis Technical Manual, 701022 Rev.2, Affymetrix).
## GSM190153   Biotinylated cRNA were prepared according to the standard Affymetrix protocol from 3 microg total RNA (Expression Analysis Technical Manual, 701022 Rev.2, Affymetrix).
## GSM254157 Biotinylated cRNA were prepared according to the standard Affymetrix protocol from 1-2 microg total RNA (Expression Analysis Technical Manual, 701022 Rev.2, Affymetrix).
## GSM298223   Biotinylated cRNA were prepared according to the standard Affymetrix protocol from 3 microg total RNA (Expression Analysis Technical Manual, 701022 Rev.2, Affymetrix).
## GSM298227 Biotinylated cRNA were prepared according to the standard Affymetrix protocol from 1-2 microg total RNA (Expression Analysis Technical Manual, 701022 Rev.2, Affymetrix).
## GSM298228 Biotinylated cRNA were prepared according to the standard Affymetrix protocol from 1-2 microg total RNA (Expression Analysis Technical Manual, 701022 Rev.2, Affymetrix).
##           taxid_ch1
## GSM190150      9606
## GSM190153      9606
## GSM254157      9606
## GSM298223      9606
## GSM298227      9606
## GSM298228      9606
##                                                                                                                                                                                  hyb_protocol
## GSM190150 Following fragmentation, 15 microg of cRNA were hybridized for 16 hr at 45C on GeneChip HG-U133 Plus 2.0. GeneChips were washed and stained in the Affymetrix Fluidics Station 450.
## GSM190153 Following fragmentation, 15 microg of cRNA were hybridized for 16 hr at 45C on GeneChip HG-U133 Plus 2.0. GeneChips were washed and stained in the Affymetrix Fluidics Station 450.
## GSM254157 Following fragmentation, 10 microg of cRNA were hybridized for 16 hr at 45C on GeneChip HG-U133 Plus 2.0. GeneChips were washed and stained in the Affymetrix Fluidics Station 450.
## GSM298223 Following fragmentation, 15 microg of cRNA were hybridized for 16 hr at 45C on GeneChip HG-U133 Plus 2.0. GeneChips were washed and stained in the Affymetrix Fluidics Station 450.
## GSM298227 Following fragmentation, 10 microg of cRNA were hybridized for 16 hr at 45C on GeneChip HG-U133 Plus 2.0. GeneChips were washed and stained in the Affymetrix Fluidics Station 450.
## GSM298228 Following fragmentation, 10 microg of cRNA were hybridized for 16 hr at 45C on GeneChip HG-U133 Plus 2.0. GeneChips were washed and stained in the Affymetrix Fluidics Station 450.
##                                                        scan_protocol
## GSM190150 GeneChips were scanned using the GeneChip Scanner 3000 7G.
## GSM190153 GeneChips were scanned using the GeneChip Scanner 3000 7G.
## GSM254157 GeneChips were scanned using the GeneChip Scanner 3000 7G.
## GSM298223 GeneChips were scanned using the GeneChip Scanner 3000 7G.
## GSM298227 GeneChips were scanned using the GeneChip Scanner 3000 7G.
## GSM298228 GeneChips were scanned using the GeneChip Scanner 3000 7G.
##                         description
## GSM190150 small airways, non-smoker
## GSM190153 small airways, non-smoker
## GSM254157 small airways, smoker 112
## GSM298223                      none
## GSM298227                      none
## GSM298228                      none
##                                                                                                                                                     data_processing
## GSM190150 The data were analyzed with Microarray Suite version 5.0 (MAS 5.0) using Affymetrix default analysis settings and global scaling as normalization method.
## GSM190153 The data were analyzed with Microarray Suite version 5.0 (MAS 5.0) using Affymetrix default analysis settings and global scaling as normalization method.
## GSM254157 The data were analyzed with Microarray Suite version 5.0 (MAS 5.0) using Affymetrix default analysis settings and global scaling as normalization method.
## GSM298223 The data were analyzed with Microarray Suite version 5.0 (MAS 5.0) using Affymetrix default analysis settings and global scaling as normalization method.
## GSM298227 The data were analyzed with Microarray Suite version 5.0 (MAS 5.0) using Affymetrix default analysis settings and global scaling as normalization method.
## GSM298228 The data were analyzed with Microarray Suite version 5.0 (MAS 5.0) using Affymetrix default analysis settings and global scaling as normalization method.
##           platform_id           contact_name           contact_email
## GSM190150      GPL570 Yael,,Strulovici-Barel yas2003@med.cornell.edu
## GSM190153      GPL570 Yael,,Strulovici-Barel yas2003@med.cornell.edu
## GSM254157      GPL570 Yael,,Strulovici-Barel yas2003@med.cornell.edu
## GSM298223      GPL570 Yael,,Strulovici-Barel yas2003@med.cornell.edu
## GSM298227      GPL570 Yael,,Strulovici-Barel yas2003@med.cornell.edu
## GSM298228      GPL570 Yael,,Strulovici-Barel yas2003@med.cornell.edu
##           contact_laboratory             contact_department
## GSM190150            Crystal Department of Genetic Medicine
## GSM190153            Crystal Department of Genetic Medicine
## GSM254157            Crystal Department of Genetic Medicine
## GSM298223            Crystal Department of Genetic Medicine
## GSM298227            Crystal Department of Genetic Medicine
## GSM298228            Crystal Department of Genetic Medicine
##                       contact_institute  contact_address contact_city
## GSM190150 Weill Cornell Medical College 1300 York Avenue     New York
## GSM190153 Weill Cornell Medical College 1300 York Avenue     New York
## GSM254157 Weill Cornell Medical College 1300 York Avenue     New York
## GSM298223 Weill Cornell Medical College 1300 York Avenue     New York
## GSM298227 Weill Cornell Medical College 1300 York Avenue     New York
## GSM298228 Weill Cornell Medical College 1300 York Avenue     New York
##           contact_state contact_zip/postal_code contact_country
## GSM190150            NY                   10021             USA
## GSM190153            NY                   10021             USA
## GSM254157            NY                   10021             USA
## GSM298223            NY                   10021             USA
## GSM298227            NY                   10021             USA
## GSM298228            NY                   10021             USA
##                                                                          supplementary_file
## GSM190150 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM190nnn/GSM190150/suppl/GSM190150.CEL.gz
## GSM190153 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM190nnn/GSM190153/suppl/GSM190153.CEL.gz
## GSM254157 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM254nnn/GSM254157/suppl/GSM254157.CEL.gz
## GSM298223 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM298nnn/GSM298223/suppl/GSM298223.CEL.gz
## GSM298227 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM298nnn/GSM298227/suppl/GSM298227.CEL.gz
## GSM298228 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM298nnn/GSM298228/suppl/GSM298228.CEL.gz
##                                                                        supplementary_file.1
## GSM190150 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM190nnn/GSM190150/suppl/GSM190150.CHP.gz
## GSM190153 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM190nnn/GSM190153/suppl/GSM190153.CHP.gz
## GSM254157 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM254nnn/GSM254157/suppl/GSM254157.CHP.gz
## GSM298223 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM298nnn/GSM298223/suppl/GSM298223.CHP.gz
## GSM298227 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM298nnn/GSM298227/suppl/GSM298227.CHP.gz
## GSM298228 ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM298nnn/GSM298228/suppl/GSM298228.CHP.gz
##           data_row_count                 relation               relation.1
## GSM190150          54675 Reanalyzed by: GSE119087                         
## GSM190153          54675  Reanalyzed by: GSE60486 Reanalyzed by: GSE119087
## GSM254157          54675  Reanalyzed by: GSE60486 Reanalyzed by: GSE119087
## GSM298223          54675                                                  
## GSM298227          54675 Reanalyzed by: GSE119087                         
## GSM298228          54675 Reanalyzed by: GSE119087                         
##           age:ch1 cilia length:ch1 copd status:ch1
## GSM190150      34             <NA>            <NA>
## GSM190153      45             <NA>            <NA>
## GSM254157      45             <NA>            <NA>
## GSM298223      38             <NA>            <NA>
## GSM298227      29             <NA>            <NA>
## GSM298228      39             <NA>            <NA>
##           department of genetic medicine id:ch1 ethnic group:ch1 ethnicity:ch1
## GSM190150                                  <NA>            black          <NA>
## GSM190153                                  <NA>         hispanic          <NA>
## GSM254157                                  <NA>            white          <NA>
## GSM298223                                  <NA>         hispanic          <NA>
## GSM298227                                  <NA>         hispanic          <NA>
## GSM298228                                  <NA>            asian          <NA>
##           serum 25-oh-d:ch1 sex:ch1    smoking status:ch1      group
## GSM190150              <NA>       M            non-smoker non.smoker
## GSM190153              <NA>       F            non-smoker non.smoker
## GSM254157              <NA>       M smoker, 23 pack-years     smoker
## GSM298223              <NA>       M            non-smoker non.smoker
## GSM298227              <NA>       M            non-smoker non.smoker
## GSM298228              <NA>       F            non-smoker non.smoker
# So I think the features I want to keep will be:
# Dates of submission/updates etc, sex, ethnicity, smoking status
# Keep the columns that might contain data of interest, which will need to be cleaned up.

# List of column names I want to keep and clean up into usable labels
columns_to_find <- c("geo_accession","status","submission_date","last_update_date","characteristics_ch1","characteristics_ch1.1","characteristics_ch1.2","characteristics_ch1.3","age:ch1","cilia length:ch1","ethnic group:ch1","ethnicity:ch1","serum 25-oh-d:ch1","sex:ch1","smoking status:ch1","group")

# Get the column indexes
indexes <- sapply(columns_to_find, function(col_name) which(names(phenotypic_data) == col_name))
indexes <- unlist(indexes)

phenotypic_data <- phenotypic_data[,c(indexes)]

# Now I need to parse out sex, ethnicity, smoking status, and age, vitamin D, pack years.

#Rename "group" as "smoking status"
names(phenotypic_data)[16] <- "smoking_status"

## Grabbing ethnicity values from the columns ##
# Initialize a new column "ethnicity" with NA values
phenotypic_data$ethnicity <- NA

# Function to find 'eth' in a row and return the corresponding value
find_ethnicity <- function(row) {
  eth_column <- which(grepl('eth', row))
  if (length(eth_column) > 0) {
    return(row[eth_column])
  } else {
    return(NA)
  }
}
# Apply the function row-wise to populate the "ethnicity" column
phenotypic_data$ethnicity <- apply(phenotypic_data, 1, find_ethnicity)

## Grabbing sex values from the columns ##
# Initialize a new column "sex" with NA values
phenotypic_data$sex <- NA

# Function to find 'sex' in a row and return the corresponding value
find_sex <- function(row) {
  sex_column <- which(grepl('sex', row))
  if (length(sex_column) > 0) {
    return(row[sex_column])
  } else {
    return(NA)
  }
}
# Apply the function row-wise to populate the "sex" column
phenotypic_data$sex <- apply(phenotypic_data, 1, find_sex)


## Grabbing pack_years values from the columns ##
# Initialize a new column "pack_years" with NA values
phenotypic_data$pack_years <- NA

# Function to find 'pack_years' in a row and return the corresponding value, but just the first instance
find_pack_years <- function(row) {
  pack_years_column <- which(grepl('pack', row))
  if (length(pack_years_column) > 0) {
    return(row[pack_years_column[1]])
  } else {
    return(NA)
  }
}
# Apply the function row-wise to populate the "pack_years" column
phenotypic_data$pack_years <- apply(phenotypic_data, 1, find_pack_years)
#unlist the column
phenotypic_data$pack_years <- unlist(phenotypic_data$pack_years )



## Grabbing age values from the columns ##
# Initialize a new column "age" with NA values
phenotypic_data$age <- NA

# Function to find 'age' in a row and return the corresponding value
find_age <- function(row) {
  age_column <- which(grepl('age', row))
  if (length(age_column) > 0) {
    return(row[age_column])
  } else {
    return(NA)
  }
}
# Apply the function row-wise to populate the "age" column
phenotypic_data$age <- apply(phenotypic_data, 1, find_age)


## Grabbing vitamin_d values from the columns ##
# Initialize a new column "vitamin_d" with NA values
phenotypic_data$vitamin_d <- NA

# Function to find 'vitamin_d' in a row and return the corresponding value, first instance
find_vitamin_d <- function(row) {
  vitamin_d_column <- which(grepl('vitamin', row))
  if (length(vitamin_d_column) > 0) {
    return(row[vitamin_d_column[1]])
  } else {
    return(NA)
  }
}
# Apply the function row-wise to populate the "vitamin_d" column
phenotypic_data$vitamin_d <- apply(phenotypic_data, 1, find_vitamin_d)

## Grabbing vitamin_d values from the columns ##
# Initialize a new column "vitamin_d" with NA values
phenotypic_data$vitamin_d <- NA

# Function to find 'vitamin_d' in a row and return the corresponding value, first instance
find_vitamin_d <- function(row) {
  vitamin_d_column <- which(grepl('vitamin', row))
  if (length(vitamin_d_column) > 0) {
    return(row[vitamin_d_column[1]])
  } else {
    return(NA)
  }
}
# Apply the function row-wise to populate the "vitamin_d" column
phenotypic_data$vitamin_d <- apply(phenotypic_data, 1, find_vitamin_d)

## Grabbing cilia values from the columns ##
# Initialize a new column "cilia_length" with NA values
phenotypic_data$cilia_length <- NA

# Function to find 'cilia' in a row and return the corresponding value, first instance
find_cilia <- function(row) {
  cilia_column <- which(grepl('cilia', row))
  if (length(cilia_column) > 0) {
    return(row[cilia_column[1]])
  } else {
    return(NA)
  }
}
# Apply the function row-wise to populate the "cilia" column
phenotypic_data$cilia_length <- apply(phenotypic_data, 1, find_cilia)



## Now cut out the messy columns
phenotypic_data <- phenotypic_data[,-c(5:15)]

## Remove unnecessary prefix info
phenotypic_data$ethnicity <- gsub(".*: ", "", phenotypic_data$ethnicity )
phenotypic_data$age <- gsub(".*: ", "", phenotypic_data$age)
phenotypic_data$sex <- gsub(".*: ", "", phenotypic_data$sex)
phenotypic_data$vitamin_d <- gsub(".*: ", "", phenotypic_data$vitamin_d)
phenotypic_data$cilia_length <- gsub(".*: ", "", phenotypic_data$cilia_length)

phenotypic_data$pack_years<- gsub(".*, ", "", phenotypic_data$pack_years)
phenotypic_data$pack_years<- gsub("pack-years", "", phenotypic_data$pack_years)


# Reformat the submission dates to be sortable

phenotypic_data <- phenotypic_data %>%
  mutate(submission_date = ifelse(submission_date == "Dec 20 2012", "2012-12-20", submission_date)) %>%
  mutate(submission_date = ifelse(submission_date == "Jan 03 2008", "2008-01-08", submission_date)) %>%
  mutate(submission_date = ifelse(submission_date == "Jan 31 2013", "2013-01-31", submission_date)) %>%
  mutate(submission_date = ifelse(submission_date == "Jun 03 2010", "2010-06-03", submission_date)) %>%
  mutate(submission_date = ifelse(submission_date == "Jun 13 2008", "2008-06-13", submission_date)) %>%
  mutate(submission_date = ifelse(submission_date == "May 17 2007", "2007-05-17", submission_date)) %>%
  mutate(submission_date = ifelse(submission_date == "Nov 08 2013", "2013-11-08", submission_date)) %>%
  mutate(submission_date = ifelse(submission_date == "Nov 10 2014", "2014-11-10", submission_date))

1.3.2: Plot PCA and use phenotypic information to look for sources of batch effect/variation, and correct for these with ComBat

# assign samples to groups and set up design matrix
gs <- factor(sml)
groups <- make.names(c("non-smoker","smoker"))
levels(gs) <- groups
gset$group <- gs


## Plot PCA 1 ##
colz <- as.numeric(as.factor(gs)) # Get color values from group

plotMDS(exprs(gset),
        gene.selection = "common",
        main = "PCA for CS vs NS GSE63127",
        col = colz,
        pch = 1
)
legend("bottom", legend = levels(as.factor(gs)), 
       fill = unique(colz), 
       title = "Smoking status")

## We have 4 definite clusters that are not based on smoking status. 
## As such, it is a good idea to check the table of sample phenotypic information to look for sources of variation between samples.


pointz <- as.numeric(as.factor(phenotypic_data$submission_date<= "2010-06-03")) # Get point shape values from date of submission: split into 2010 and earlier, post-2010]

## Plot PCA with date information##
plotMDS(exprs(gset),
        gene.selection = "common",
        main = "PCA for CS vs NS GSE63127",
        col = colz, # Colors smokers red and nonsmokers black
        pch = pointz
        #labels = gset$group
)
legend("bottom", 
       legend = c("Smokers", "Nonsmokers", 
                  "2010 and Prior", "Post-2010"), 
       col = c("red", "black", "black", "black"), # Colors: only for smoking status
       pch = c(15, 15, 2, 1),                   # Shapes: 2 = triangle, 1 = circle
       pt.cex = c(1, 1, 1, 1),             # Adjust size for better visibility
       text.col = "black",                     # Text color
#       bty = "n"
       )                              # Box type: 'n' removes border

# Clearly the source of batch effect in PC1 is submission date post-2010.
# Note: I found that the split was at 2010 by doing a bit of playing around with other clustering methods, not shown here.

# First batch correction (submission date)
library(sva)
## Loading required package: mgcv
## Loading required package: nlme
## Warning: package 'nlme' was built under R version 4.3.3
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.
## Loading required package: genefilter
## Loading required package: BiocParallel
library(limma)

# Making a batch vector
submission_post_2010_batch <- ifelse(phenotypic_data$submission_date < as.Date("2012-01-01"), 1, 2)

# Adjust the expression matrix for submission date batch effect
exprs_matrix_combat <- ComBat(dat=exprs(gset), batch=submission_post_2010_batch, mod=NULL, par.prior=TRUE, prior.plots=FALSE)
## Found2batches
## Adjusting for0covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
## Plot PCA for expression values after first batch correction ##
date_corrected_PCA <- plotMDS(exprs_matrix_combat,
        gene.selection = "common",
        main = "PCA for CS vs NS GSE63127, corrected for submission date",
        col = colz, # Colors smokers red and nonsmokers black
        pch = pointz

)

## Some evidence that second source of variation could be due to sex (but only 11/182 samples have sex labels):
plotMDS(exprs_matrix_combat,
        gene.selection = "common",
        main = "PCA for CS vs NS GSE63127, corrected for submission date",
        col = colz, # Colors smokers red and nonsmokers black
        #pch = pointz2 # Using separate shapes for all submission dates
        labels = phenotypic_data$sex
)
legend("bottom",
       legend = c("M = Male", "F = Female", "Smoker", "Nonsmoker"),
       col = c("black", "black", "red", "black"),
       pch = c(NA, NA, 15, 15)
       #title = "Smoking status"
       )

## Samples are divided by sex, but 11/182 samples is not enough to draw a conclusion here.

## Second correction for unknown source of variation using ComBat: ##

# Assign batch labels based on the first dimension from MDS (equivalent to PC1), since the dividing line for the batches lies at 0
unknown_batch_labels <- ifelse(date_corrected_PCA$x < 0, 1, 2)

# Do a second batch correction
exprs_matrix_combat_2 <- ComBat(dat=exprs_matrix_combat, batch=unknown_batch_labels, mod=NULL, par.prior=TRUE, prior.plots=FALSE)
## Found2batches
## Adjusting for0covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
# View PCA plot
plotMDS(exprs_matrix_combat_2,
        gene.selection = "common",
        main = "PCA for CS vs NS GSE63127 after 2 ComBat corrections",
        col = colz, # Colors smokers red and nonsmokers black
        pch = pointz
        #labels = gset$group
)
legend("topleft", 
       legend = c("Smokers", "Nonsmokers", 
                  "2010 and Prior", "Post-2010"), 
       col = c("red", "black", "black", "black"), # Colors: only for smoking status
       pch = c(15, 15, 2, 1),                   # Shapes: 2 = triangle, 1 = circle
       pt.cex = c(1, 1, 1, 1),             # Adjust size for better visibility
       text.col = "black",                     # Text color
#       bty = "n"
       )  

## Now PC1 corresponds quite well to smoking status after the two ComBat corrections.

1.4 Differential expression analysis (limma with vooma)

# Finish setting up the design matrix
design <- model.matrix(~group + 0, gset)
colnames(design) <- levels(gs)


## Crucial bit: Replace the expression values in gset with the batch corrected ones ##
exprs(gset) <- as.matrix(exprs_matrix_combat_2)

# calculate precision weights and show plot of mean-variance trend
v <- vooma(gset, design, plot=T)

# OR weights by group
# v <- voomaByGroup(gset, group=groups, design, plot=T, cex=0.1, pch=".", col=1:nlevels(gs))
v$genes <- fData(gset) # attach gene annotations

# fit linear model
fit  <- lmFit(v)

# set up contrasts of interest and recalculate model coefficients
cts <- paste(groups[2], groups[1], sep="-")
cont.matrix <- makeContrasts(contrasts=cts, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)

# compute statistics and table of top significant genes
fit2 <- eBayes(fit2, 0.01)
tT <- topTable(fit2, adjust="fdr", sort.by="B", number=Inf)

tT <- subset(tT, select=c("ID","Gene.symbol","logFC","adj.P.Val"))

1.5 Basic filtering of DEGs (unlabelled, duplicates, FDR < 0.05)

# Now I want to filter unlabelled genes, duplicate genes, and adj.P.Val < 0.05
GSE63127_CS_NS_GEO2R_limma_all <- tT %>%
  filter(Gene.symbol != "") %>% # Remove blank gene symbols
#  filter(adj.P.Val <= 0.05) %>% # Remove FDR > 0.05 genes
  group_by(Gene.symbol) %>%
  slice_min(adj.P.Val, with_ties = TRUE) %>% 
  # For probesets mapping to same gene, keep one with lowest FDR. Keep ties for now to check later.
  ungroup()
head(GSE63127_CS_NS_GEO2R_limma_all)
## # A tibble: 6 × 4
##   ID          Gene.symbol  logFC adj.P.Val
##   <chr>       <chr>        <dbl>     <dbl>
## 1 229819_at   A1BG        -0.106    0.481 
## 2 232462_s_at A1BG-AS1     0.531    0.0224
## 3 220951_s_at A1CF         0.302    0.123 
## 4 1558450_at  A2M          0.110    0.453 
## 5 1564139_at  A2M-AS1     -0.138    0.0724
## 6 1553505_at  A2ML1        0.145    0.636
GSE63127_CS_NS_GEO2R_limma_sig <- GSE63127_CS_NS_GEO2R_limma_all %>%
  filter(adj.P.Val <= 0.05) # Remove FDR > 0.05 genes
head(GSE63127_CS_NS_GEO2R_limma_sig)
## # A tibble: 6 × 4
##   ID          Gene.symbol  logFC adj.P.Val
##   <chr>       <chr>        <dbl>     <dbl>
## 1 232462_s_at A1BG-AS1     0.531  2.24e- 2
## 2 218434_s_at AACS         0.128  2.82e- 3
## 3 223593_at   AADAT       -0.614  9.30e-19
## 4 202852_s_at AAGAB        0.179  3.13e- 3
## 5 225522_at   AAK1         0.344  3.86e-15
## 6 220268_at   AAMDC        0.657  1.81e- 2
# Checking for ties
ties <- GSE63127_CS_NS_GEO2R_limma_sig %>%
  group_by(Gene.symbol) %>%
  filter(n() > 1) %>%
  ungroup()
print(ties) # No ties
## # A tibble: 0 × 4
## # ℹ 4 variables: ID <chr>, Gene.symbol <chr>, logFC <dbl>, adj.P.Val <dbl>
nrow(GSE63127_CS_NS_GEO2R_limma_sig)
## [1] 7105

1.8 Visualization of DEGs (volcano plot)

log2FC_cutoff1 <- 0.2

v1 <- EnhancedVolcano::EnhancedVolcano(
  toptable = GSE63127_CS_NS_GEO2R_limma_all,
  lab = GSE63127_CS_NS_GEO2R_limma_all$Gene.symbol,
  x = "logFC", # "mean difference" is estimate here
  y = "adj.P.Val", 
 # pCutoffCol = 'min_smoothed_fdr',
  xlab = "log2FC",
  ylab = "-log10(FDR)",
  title = "A1 DEGs",
  subtitle = paste0("log2FC cutoff: ", log2FC_cutoff1),
  caption = paste0("Total = ", nrow(GSE63127_CS_NS_GEO2R_limma_sig[abs(GSE63127_CS_NS_GEO2R_limma_sig$logFC)>log2FC_cutoff1,]), " significant DEGs above log2FC cutoff"),
  col = c("grey30", "mediumpurple2", "royalblue", "orange2"),
  legendPosition = "bottom",
  labSize = 4,
  max.overlaps = 5,
  drawConnectors = TRUE,
  arrowheads = FALSE,
  pCutoff = 0.05,
  FCcutoff = log2FC_cutoff1,
  gridlines.minor = FALSE,
  gridlines.major = FALSE,
  xlim = c(-3, 6)
)

v1
## Warning: ggrepel: 4637 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

1.7 Save outputs

# Change date suffix as appropriate if changes are made
#write.table(GSE63127_CS_NS_GEO2R_limma_sig, "../2_Outputs/GSE63127_CS_NS_GEO2R_limma_sig_20241107.txt", sep = '\t')

1.8 Extra checks

2024/11/07: Comparing the DEGs list when quantile normalization is used vs not used:

GSE63127_CS_NS_GEO2R_limma_sig_quantile <- read.table("../2_Outputs/GSE63127_CS_NS_GEO2R_limma_sig_20241016.txt", header = TRUE)
GSE63127_CS_NS_GEO2R_limma_sig_no_quantile <- GSE63127_CS_NS_GEO2R_limma_sig

# Compare results
library(VennDiagram)
## Loading required package: grid
## Loading required package: futile.logger
## 
## Attaching package: 'futile.logger'
## The following object is masked from 'package:mgcv':
## 
##     scat
venn <- venn.diagram(
  list(
    DEGs_no_quantile = GSE63127_CS_NS_GEO2R_limma_sig_no_quantile$Gene.symbol,
    DEGs_quantile = GSE63127_CS_NS_GEO2R_limma_sig_quantile$Gene.symbol
  ),
  filename = NULL
)
# Display the diagram
grid.newpage()
grid.draw(venn)

The lists agree quite well. The list without quantile normalization is larger. Quantile normalization could be over-normalization and mask some variation. The PCA plots still look good without quantile normalization. I will elect to go forward without quantile normalization. I will have to apply the same to all the other airway datasets for the meta-analysis bit.

2. Differential expression analysis of TCGA-LUAD tumor vs normal tissue (T-E)

2.1 Loading dataset, formatting as tumor-normal pairs

library(TCGAbiolinks)
library(SummarizedExperiment)
## Warning: package 'SummarizedExperiment' was built under R version 4.3.2
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## Warning: package 'matrixStats' was built under R version 4.3.3
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:genefilter':
## 
##     rowSds, rowVars
## The following object is masked from 'package:dplyr':
## 
##     count
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## The following objects are masked from 'package:genefilter':
## 
##     rowSds, rowVars
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.3.2
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:utils':
## 
##     findMatches
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:nlme':
## 
##     collapse
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.3.3
library(dplyr)

query <- GDCquery(project = "TCGA-LUAD",
                  data.category = "Transcriptome Profiling",
                  data.type = "Gene Expression Quantification",
                  sample.type = c("Primary Tumor", "Solid Tissue Normal"),
                  workflow.type = "STAR - Counts")
## --------------------------------------
## o GDCquery: Searching in GDC database
## --------------------------------------
## Genome of reference: hg38
## --------------------------------------------
## oo Accessing GDC. This might take a while...
## --------------------------------------------
## ooo Project: TCGA-LUAD
## --------------------
## oo Filtering results
## --------------------
## ooo By data.type
## ooo By workflow.type
## ooo By sample.type
## ----------------
## oo Checking data
## ----------------
## ooo Checking if there are duplicated cases
## ooo Checking if there are results for the query
## -------------------
## o Preparing output
## -------------------
GDCdownload(query)
## Downloading data for project TCGA-LUAD
## Of the 598 files for download 598 already exist.
## All samples have been already downloaded
data <- GDCprepare(query)
## |                                                    |  0%                      |                                                    |0.1672241% ~58 s remaining|                                                    |0.3344482% ~36 s remaining|                                                    |0.5016722% ~28 s remaining|                                                    |0.6688963% ~24 s remaining|                                                    |0.8361204% ~24 s remaining|                                                    |1.003344% ~22 s remaining |                                                    |1.170569% ~21 s remaining |                                                    |1.337793% ~20 s remaining |                                                    |1.505017% ~19 s remaining |                                                    |1.672241% ~18 s remaining |                                                    |1.839465% ~17 s remaining |=                                                   |2.006689% ~17 s remaining |=                                                   |2.173913% ~16 s remaining |=                                                   |2.341137% ~16 s remaining |=                                                   |2.508361% ~16 s remaining |=                                                   |2.675585% ~16 s remaining |=                                                   |2.842809% ~15 s remaining |=                                                   |3.010033% ~15 s remaining |=                                                   |3.177258% ~15 s remaining |=                                                   |3.344482% ~15 s remaining |=                                                   |3.511706% ~15 s remaining |=                                                   |3.67893% ~15 s remaining  |==                                                  |3.846154% ~14 s remaining |==                                                  |4.013378% ~14 s remaining |==                                                  |4.180602% ~14 s remaining |==                                                  |4.347826% ~14 s remaining |==                                                  |4.51505% ~14 s remaining  |==                                                  |4.682274% ~14 s remaining |==                                                  |4.849498% ~14 s remaining |==                                                  |5.016722% ~14 s remaining |==                                                  |5.183946% ~14 s remaining |==                                                  |5.351171% ~14 s remaining |==                                                  |5.518395% ~14 s remaining |==                                                  |5.685619% ~14 s remaining |===                                                 |5.852843% ~14 s remaining |===                                                 |6.020067% ~13 s remaining |===                                                 |6.187291% ~13 s remaining |===                                                 |6.354515% ~13 s remaining |===                                                 |6.521739% ~13 s remaining |===                                                 |6.688963% ~13 s remaining |===                                                 |6.856187% ~13 s remaining |===                                                 |7.023411% ~13 s remaining |===                                                 |7.190635% ~13 s remaining |===                                                 |7.35786% ~13 s remaining  |===                                                 |7.525084% ~13 s remaining |====                                                |7.692308% ~13 s remaining |====                                                |7.859532% ~13 s remaining |====                                                |8.026756% ~13 s remaining |====                                                |8.19398% ~13 s remaining  |====                                                |8.361204% ~13 s remaining |====                                                |8.528428% ~13 s remaining |====                                                |8.695652% ~13 s remaining |====                                                |8.862876% ~13 s remaining |====                                                |9.0301% ~13 s remaining   |====                                                |9.197324% ~13 s remaining |====                                                |9.364548% ~13 s remaining |====                                                |9.531773% ~13 s remaining |=====                                               |9.698997% ~13 s remaining |=====                                               |9.866221% ~13 s remaining |=====                                               |10.03344% ~13 s remaining |=====                                               |10.20067% ~13 s remaining |=====                                               |10.36789% ~13 s remaining |=====                                               |10.53512% ~12 s remaining |=====                                               |10.70234% ~12 s remaining |=====                                               |10.86957% ~12 s remaining |=====                                               |11.03679% ~12 s remaining |=====                                               |11.20401% ~12 s remaining |=====                                               |11.37124% ~12 s remaining |======                                              |11.53846% ~12 s remaining |======                                              |11.70569% ~12 s remaining |======                                              |11.87291% ~12 s remaining |======                                              |12.04013% ~12 s remaining |======                                              |12.20736% ~12 s remaining |======                                              |12.37458% ~12 s remaining |======                                              |12.54181% ~12 s remaining |======                                              |12.70903% ~12 s remaining |======                                              |12.87625% ~12 s remaining |======                                              |13.04348% ~12 s remaining |======                                              |13.2107% ~12 s remaining  |======                                              |13.37793% ~12 s remaining |=======                                             |13.54515% ~12 s remaining |=======                                             |13.71237% ~12 s remaining |=======                                             |13.8796% ~12 s remaining  |=======                                             |14.04682% ~12 s remaining |=======                                             |14.21405% ~12 s remaining |=======                                             |14.38127% ~12 s remaining |=======                                             |14.54849% ~12 s remaining |=======                                             |14.71572% ~12 s remaining |=======                                             |14.88294% ~12 s remaining |=======                                             |15.05017% ~12 s remaining |=======                                             |15.21739% ~12 s remaining |========                                            |15.38462% ~12 s remaining |========                                            |15.55184% ~12 s remaining |========                                            |15.71906% ~12 s remaining |========                                            |15.88629% ~12 s remaining |========                                            |16.05351% ~12 s remaining |========                                            |16.22074% ~12 s remaining |========                                            |16.38796% ~12 s remaining |========                                            |16.55518% ~11 s remaining |========                                            |16.72241% ~11 s remaining |========                                            |16.88963% ~11 s remaining |========                                            |17.05686% ~11 s remaining |========                                            |17.22408% ~11 s remaining |=========                                           |17.3913% ~11 s remaining  |=========                                           |17.55853% ~11 s remaining |=========                                           |17.72575% ~11 s remaining |=========                                           |17.89298% ~11 s remaining |=========                                           |18.0602% ~11 s remaining  |=========                                           |18.22742% ~11 s remaining |=========                                           |18.39465% ~11 s remaining |=========                                           |18.56187% ~11 s remaining |=========                                           |18.7291% ~11 s remaining  |=========                                           |18.89632% ~11 s remaining |=========                                           |19.06355% ~11 s remaining |==========                                          |19.23077% ~11 s remaining |==========                                          |19.39799% ~11 s remaining |==========                                          |19.56522% ~11 s remaining |==========                                          |19.73244% ~11 s remaining |==========                                          |19.89967% ~11 s remaining |==========                                          |20.06689% ~11 s remaining |==========                                          |20.23411% ~11 s remaining |==========                                          |20.40134% ~11 s remaining |==========                                          |20.56856% ~11 s remaining |==========                                          |20.73579% ~11 s remaining |==========                                          |20.90301% ~11 s remaining |==========                                          |21.07023% ~11 s remaining |===========                                         |21.23746% ~11 s remaining |===========                                         |21.40468% ~11 s remaining |===========                                         |21.57191% ~11 s remaining |===========                                         |21.73913% ~11 s remaining |===========                                         |21.90635% ~11 s remaining |===========                                         |22.07358% ~11 s remaining |===========                                         |22.2408% ~11 s remaining  |===========                                         |22.40803% ~11 s remaining |===========                                         |22.57525% ~11 s remaining |===========                                         |22.74247% ~11 s remaining |===========                                         |22.9097% ~11 s remaining  |============                                        |23.07692% ~11 s remaining |============                                        |23.24415% ~11 s remaining |============                                        |23.41137% ~10 s remaining |============                                        |23.5786% ~10 s remaining  |============                                        |23.74582% ~10 s remaining |============                                        |23.91304% ~10 s remaining |============                                        |24.08027% ~10 s remaining |============                                        |24.24749% ~10 s remaining |============                                        |24.41472% ~10 s remaining |============                                        |24.58194% ~10 s remaining |============                                        |24.74916% ~10 s remaining |============                                        |24.91639% ~10 s remaining |=============                                       |25.08361% ~10 s remaining |=============                                       |25.25084% ~10 s remaining |=============                                       |25.41806% ~10 s remaining |=============                                       |25.58528% ~10 s remaining |=============                                       |25.75251% ~10 s remaining |=============                                       |25.91973% ~10 s remaining |=============                                       |26.08696% ~10 s remaining |=============                                       |26.25418% ~10 s remaining |=============                                       |26.4214% ~10 s remaining  |=============                                       |26.58863% ~10 s remaining |=============                                       |26.75585% ~10 s remaining |==============                                      |26.92308% ~10 s remaining |==============                                      |27.0903% ~10 s remaining  |==============                                      |27.25753% ~10 s remaining |==============                                      |27.42475% ~10 s remaining |==============                                      |27.59197% ~10 s remaining |==============                                      |27.7592% ~10 s remaining  |==============                                      |27.92642% ~10 s remaining |==============                                      |28.09365% ~10 s remaining |==============                                      |28.26087% ~10 s remaining |==============                                      |28.42809% ~10 s remaining |==============                                      |28.59532% ~10 s remaining |==============                                      |28.76254% ~10 s remaining |===============                                     |28.92977% ~10 s remaining |===============                                     |29.09699% ~10 s remaining |===============                                     |29.26421% ~10 s remaining |===============                                     |29.43144% ~10 s remaining |===============                                     |29.59866% ~10 s remaining |===============                                     |29.76589% ~10 s remaining |===============                                     |29.93311% ~10 s remaining |===============                                     |30.10033% ~10 s remaining |===============                                     |30.26756% ~10 s remaining |===============                                     |30.43478% ~10 s remaining |===============                                     |30.60201% ~9 s remaining  |================                                    |30.76923% ~9 s remaining  |================                                    |30.93645% ~9 s remaining  |================                                    |31.10368% ~9 s remaining  |================                                    |31.2709% ~9 s remaining   |================                                    |31.43813% ~9 s remaining  |================                                    |31.60535% ~9 s remaining  |================                                    |31.77258% ~9 s remaining  |================                                    |31.9398% ~9 s remaining   |================                                    |32.10702% ~9 s remaining  |================                                    |32.27425% ~9 s remaining  |================                                    |32.44147% ~9 s remaining  |================                                    |32.6087% ~9 s remaining   |=================                                   |32.77592% ~9 s remaining  |=================                                   |32.94314% ~9 s remaining  |=================                                   |33.11037% ~9 s remaining  |=================                                   |33.27759% ~9 s remaining  |=================                                   |33.44482% ~9 s remaining  |=================                                   |33.61204% ~9 s remaining  |=================                                   |33.77926% ~9 s remaining  |=================                                   |33.94649% ~9 s remaining  |=================                                   |34.11371% ~9 s remaining  |=================                                   |34.28094% ~9 s remaining  |=================                                   |34.44816% ~9 s remaining  |==================                                  |34.61538% ~9 s remaining  |==================                                  |34.78261% ~9 s remaining  |==================                                  |34.94983% ~9 s remaining  |==================                                  |35.11706% ~9 s remaining  |==================                                  |35.28428% ~9 s remaining  |==================                                  |35.45151% ~9 s remaining  |==================                                  |35.61873% ~9 s remaining  |==================                                  |35.78595% ~9 s remaining  |==================                                  |35.95318% ~9 s remaining  |==================                                  |36.1204% ~9 s remaining   |==================                                  |36.28763% ~9 s remaining  |==================                                  |36.45485% ~9 s remaining  |===================                                 |36.62207% ~9 s remaining  |===================                                 |36.7893% ~9 s remaining   |===================                                 |36.95652% ~9 s remaining  |===================                                 |37.12375% ~9 s remaining  |===================                                 |37.29097% ~9 s remaining  |===================                                 |37.45819% ~9 s remaining  |===================                                 |37.62542% ~9 s remaining  |===================                                 |37.79264% ~8 s remaining  |===================                                 |37.95987% ~8 s remaining  |===================                                 |38.12709% ~8 s remaining  |===================                                 |38.29431% ~8 s remaining  |====================                                |38.46154% ~8 s remaining  |====================                                |38.62876% ~8 s remaining  |====================                                |38.79599% ~8 s remaining  |====================                                |38.96321% ~8 s remaining  |====================                                |39.13043% ~8 s remaining  |====================                                |39.29766% ~8 s remaining  |====================                                |39.46488% ~8 s remaining  |====================                                |39.63211% ~8 s remaining  |====================                                |39.79933% ~8 s remaining  |====================                                |39.96656% ~8 s remaining  |====================                                |40.13378% ~8 s remaining  |====================                                |40.301% ~8 s remaining    |=====================                               |40.46823% ~8 s remaining  |=====================                               |40.63545% ~8 s remaining  |=====================                               |40.80268% ~8 s remaining  |=====================                               |40.9699% ~8 s remaining   |=====================                               |41.13712% ~8 s remaining  |=====================                               |41.30435% ~8 s remaining  |=====================                               |41.47157% ~8 s remaining  |=====================                               |41.6388% ~8 s remaining   |=====================                               |41.80602% ~8 s remaining  |=====================                               |41.97324% ~8 s remaining  |=====================                               |42.14047% ~8 s remaining  |======================                              |42.30769% ~8 s remaining  |======================                              |42.47492% ~8 s remaining  |======================                              |42.64214% ~8 s remaining  |======================                              |42.80936% ~8 s remaining  |======================                              |42.97659% ~8 s remaining  |======================                              |43.14381% ~8 s remaining  |======================                              |43.31104% ~8 s remaining  |======================                              |43.47826% ~8 s remaining  |======================                              |43.64548% ~8 s remaining  |======================                              |43.81271% ~8 s remaining  |======================                              |43.97993% ~8 s remaining  |======================                              |44.14716% ~8 s remaining  |=======================                             |44.31438% ~8 s remaining  |=======================                             |44.48161% ~8 s remaining  |=======================                             |44.64883% ~8 s remaining  |=======================                             |44.81605% ~8 s remaining  |=======================                             |44.98328% ~7 s remaining  |=======================                             |45.1505% ~7 s remaining   |=======================                             |45.31773% ~7 s remaining  |=======================                             |45.48495% ~7 s remaining  |=======================                             |45.65217% ~7 s remaining  |=======================                             |45.8194% ~7 s remaining   |=======================                             |45.98662% ~7 s remaining  |========================                            |46.15385% ~7 s remaining  |========================                            |46.32107% ~7 s remaining  |========================                            |46.48829% ~7 s remaining  |========================                            |46.65552% ~7 s remaining  |========================                            |46.82274% ~7 s remaining  |========================                            |46.98997% ~7 s remaining  |========================                            |47.15719% ~7 s remaining  |========================                            |47.32441% ~7 s remaining  |========================                            |47.49164% ~7 s remaining  |========================                            |47.65886% ~7 s remaining  |========================                            |47.82609% ~7 s remaining  |========================                            |47.99331% ~7 s remaining  |=========================                           |48.16054% ~7 s remaining  |=========================                           |48.32776% ~7 s remaining  |=========================                           |48.49498% ~7 s remaining  |=========================                           |48.66221% ~7 s remaining  |=========================                           |48.82943% ~7 s remaining  |=========================                           |48.99666% ~7 s remaining  |=========================                           |49.16388% ~7 s remaining  |=========================                           |49.3311% ~7 s remaining   |=========================                           |49.49833% ~7 s remaining  |=========================                           |49.66555% ~7 s remaining  |=========================                           |49.83278% ~7 s remaining  |==========================                          | 50% ~7 s remaining       |==========================                          |50.16722% ~7 s remaining  |==========================                          |50.33445% ~7 s remaining  |==========================                          |50.50167% ~7 s remaining  |==========================                          |50.6689% ~7 s remaining   |==========================                          |50.83612% ~7 s remaining  |==========================                          |51.00334% ~7 s remaining  |==========================                          |51.17057% ~7 s remaining  |==========================                          |51.33779% ~7 s remaining  |==========================                          |51.50502% ~7 s remaining  |==========================                          |51.67224% ~7 s remaining  |==========================                          |51.83946% ~7 s remaining  |===========================                         |52.00669% ~7 s remaining  |===========================                         |52.17391% ~7 s remaining  |===========================                         |52.34114% ~6 s remaining  |===========================                         |52.50836% ~6 s remaining  |===========================                         |52.67559% ~6 s remaining  |===========================                         |52.84281% ~6 s remaining  |===========================                         |53.01003% ~6 s remaining  |===========================                         |53.17726% ~6 s remaining  |===========================                         |53.34448% ~6 s remaining  |===========================                         |53.51171% ~6 s remaining  |===========================                         |53.67893% ~6 s remaining  |============================                        |53.84615% ~6 s remaining  |============================                        |54.01338% ~6 s remaining  |============================                        |54.1806% ~6 s remaining   |============================                        |54.34783% ~6 s remaining  |============================                        |54.51505% ~6 s remaining  |============================                        |54.68227% ~6 s remaining  |============================                        |54.8495% ~6 s remaining   |============================                        |55.01672% ~6 s remaining  |============================                        |55.18395% ~6 s remaining  |============================                        |55.35117% ~6 s remaining  |============================                        |55.51839% ~6 s remaining  |============================                        |55.68562% ~6 s remaining  |=============================                       |55.85284% ~6 s remaining  |=============================                       |56.02007% ~6 s remaining  |=============================                       |56.18729% ~6 s remaining  |=============================                       |56.35452% ~6 s remaining  |=============================                       |56.52174% ~6 s remaining  |=============================                       |56.68896% ~6 s remaining  |=============================                       |56.85619% ~6 s remaining  |=============================                       |57.02341% ~6 s remaining  |=============================                       |57.19064% ~6 s remaining  |=============================                       |57.35786% ~6 s remaining  |=============================                       |57.52508% ~6 s remaining  |=============================                       |57.69231% ~6 s remaining  |==============================                      |57.85953% ~6 s remaining  |==============================                      |58.02676% ~6 s remaining  |==============================                      |58.19398% ~6 s remaining  |==============================                      |58.3612% ~6 s remaining   |==============================                      |58.52843% ~6 s remaining  |==============================                      |58.69565% ~6 s remaining  |==============================                      |58.86288% ~6 s remaining  |==============================                      |59.0301% ~6 s remaining   |==============================                      |59.19732% ~6 s remaining  |==============================                      |59.36455% ~5 s remaining  |==============================                      |59.53177% ~5 s remaining  |===============================                     |59.699% ~5 s remaining    |===============================                     |59.86622% ~5 s remaining  |===============================                     |60.03344% ~5 s remaining  |===============================                     |60.20067% ~5 s remaining  |===============================                     |60.36789% ~5 s remaining  |===============================                     |60.53512% ~5 s remaining  |===============================                     |60.70234% ~5 s remaining  |===============================                     |60.86957% ~5 s remaining  |===============================                     |61.03679% ~5 s remaining  |===============================                     |61.20401% ~5 s remaining  |===============================                     |61.37124% ~5 s remaining  |================================                    |61.53846% ~5 s remaining  |================================                    |61.70569% ~5 s remaining  |================================                    |61.87291% ~5 s remaining  |================================                    |62.04013% ~5 s remaining  |================================                    |62.20736% ~5 s remaining  |================================                    |62.37458% ~5 s remaining  |================================                    |62.54181% ~5 s remaining  |================================                    |62.70903% ~5 s remaining  |================================                    |62.87625% ~5 s remaining  |================================                    |63.04348% ~5 s remaining  |================================                    |63.2107% ~5 s remaining   |================================                    |63.37793% ~5 s remaining  |=================================                   |63.54515% ~5 s remaining  |=================================                   |63.71237% ~5 s remaining  |=================================                   |63.8796% ~5 s remaining   |=================================                   |64.04682% ~5 s remaining  |=================================                   |64.21405% ~5 s remaining  |=================================                   |64.38127% ~5 s remaining  |=================================                   |64.54849% ~5 s remaining  |=================================                   |64.71572% ~5 s remaining  |=================================                   |64.88294% ~5 s remaining  |=================================                   |65.05017% ~5 s remaining  |=================================                   |65.21739% ~5 s remaining  |==================================                  |65.38462% ~5 s remaining  |==================================                  |65.55184% ~5 s remaining  |==================================                  |65.71906% ~5 s remaining  |==================================                  |65.88629% ~5 s remaining  |==================================                  |66.05351% ~5 s remaining  |==================================                  |66.22074% ~5 s remaining  |==================================                  |66.38796% ~5 s remaining  |==================================                  |66.55518% ~5 s remaining  |==================================                  |66.72241% ~4 s remaining  |==================================                  |66.88963% ~4 s remaining  |==================================                  |67.05686% ~4 s remaining  |==================================                  |67.22408% ~4 s remaining  |===================================                 |67.3913% ~4 s remaining   |===================================                 |67.55853% ~4 s remaining  |===================================                 |67.72575% ~4 s remaining  |===================================                 |67.89298% ~4 s remaining  |===================================                 |68.0602% ~4 s remaining   |===================================                 |68.22742% ~4 s remaining  |===================================                 |68.39465% ~4 s remaining  |===================================                 |68.56187% ~4 s remaining  |===================================                 |68.7291% ~4 s remaining   |===================================                 |68.89632% ~4 s remaining  |===================================                 |69.06355% ~4 s remaining  |====================================                |69.23077% ~4 s remaining  |====================================                |69.39799% ~4 s remaining  |====================================                |69.56522% ~4 s remaining  |====================================                |69.73244% ~4 s remaining  |====================================                |69.89967% ~4 s remaining  |====================================                |70.06689% ~4 s remaining  |====================================                |70.23411% ~4 s remaining  |====================================                |70.40134% ~4 s remaining  |====================================                |70.56856% ~4 s remaining  |====================================                |70.73579% ~4 s remaining  |====================================                |70.90301% ~4 s remaining  |====================================                |71.07023% ~4 s remaining  |=====================================               |71.23746% ~4 s remaining  |=====================================               |71.40468% ~4 s remaining  |=====================================               |71.57191% ~4 s remaining  |=====================================               |71.73913% ~4 s remaining  |=====================================               |71.90635% ~4 s remaining  |=====================================               |72.07358% ~4 s remaining  |=====================================               |72.2408% ~4 s remaining   |=====================================               |72.40803% ~4 s remaining  |=====================================               |72.57525% ~4 s remaining  |=====================================               |72.74247% ~4 s remaining  |=====================================               |72.9097% ~4 s remaining   |======================================              |73.07692% ~4 s remaining  |======================================              |73.24415% ~4 s remaining  |======================================              |73.41137% ~4 s remaining  |======================================              |73.5786% ~4 s remaining   |======================================              |73.74582% ~4 s remaining  |======================================              |73.91304% ~4 s remaining  |======================================              |74.08027% ~4 s remaining  |======================================              |74.24749% ~4 s remaining  |======================================              |74.41472% ~4 s remaining  |======================================              |74.58194% ~4 s remaining  |======================================              |74.74916% ~3 s remaining  |======================================              |74.91639% ~3 s remaining  |=======================================             |75.08361% ~3 s remaining  |=======================================             |75.25084% ~3 s remaining  |=======================================             |75.41806% ~3 s remaining  |=======================================             |75.58528% ~4 s remaining  |=======================================             |75.75251% ~4 s remaining  |=======================================             |75.91973% ~4 s remaining  |=======================================             |76.08696% ~4 s remaining  |=======================================             |76.25418% ~3 s remaining  |=======================================             |76.4214% ~3 s remaining   |=======================================             |76.58863% ~3 s remaining  |=======================================             |76.75585% ~3 s remaining  |========================================            |76.92308% ~3 s remaining  |========================================            |77.0903% ~3 s remaining   |========================================            |77.25753% ~3 s remaining  |========================================            |77.42475% ~3 s remaining  |========================================            |77.59197% ~3 s remaining  |========================================            |77.7592% ~3 s remaining   |========================================            |77.92642% ~3 s remaining  |========================================            |78.09365% ~3 s remaining  |========================================            |78.26087% ~3 s remaining  |========================================            |78.42809% ~3 s remaining  |========================================            |78.59532% ~3 s remaining  |========================================            |78.76254% ~3 s remaining  |=========================================           |78.92977% ~3 s remaining  |=========================================           |79.09699% ~3 s remaining  |=========================================           |79.26421% ~3 s remaining  |=========================================           |79.43144% ~3 s remaining  |=========================================           |79.59866% ~3 s remaining  |=========================================           |79.76589% ~3 s remaining  |=========================================           |79.93311% ~3 s remaining  |=========================================           |80.10033% ~3 s remaining  |=========================================           |80.26756% ~3 s remaining  |=========================================           |80.43478% ~3 s remaining  |=========================================           |80.60201% ~3 s remaining  |==========================================          |80.76923% ~3 s remaining  |==========================================          |80.93645% ~3 s remaining  |==========================================          |81.10368% ~3 s remaining  |==========================================          |81.2709% ~3 s remaining   |==========================================          |81.43813% ~3 s remaining  |==========================================          |81.60535% ~3 s remaining  |==========================================          |81.77258% ~3 s remaining  |==========================================          |81.9398% ~3 s remaining   |==========================================          |82.10702% ~3 s remaining  |==========================================          |82.27425% ~3 s remaining  |==========================================          |82.44147% ~3 s remaining  |==========================================          |82.6087% ~3 s remaining   |===========================================         |82.77592% ~3 s remaining  |===========================================         |82.94314% ~2 s remaining  |===========================================         |83.11037% ~2 s remaining  |===========================================         |83.27759% ~2 s remaining  |===========================================         |83.44482% ~2 s remaining  |===========================================         |83.61204% ~2 s remaining  |===========================================         |83.77926% ~2 s remaining  |===========================================         |83.94649% ~2 s remaining  |===========================================         |84.11371% ~2 s remaining  |===========================================         |84.28094% ~2 s remaining  |===========================================         |84.44816% ~2 s remaining  |============================================        |84.61538% ~2 s remaining  |============================================        |84.78261% ~2 s remaining  |============================================        |84.94983% ~2 s remaining  |============================================        |85.11706% ~2 s remaining  |============================================        |85.28428% ~2 s remaining  |============================================        |85.45151% ~2 s remaining  |============================================        |85.61873% ~2 s remaining  |============================================        |85.78595% ~2 s remaining  |============================================        |85.95318% ~2 s remaining  |============================================        |86.1204% ~2 s remaining   |============================================        |86.28763% ~2 s remaining  |============================================        |86.45485% ~2 s remaining  |=============================================       |86.62207% ~2 s remaining  |=============================================       |86.7893% ~2 s remaining   |=============================================       |86.95652% ~2 s remaining  |=============================================       |87.12375% ~2 s remaining  |=============================================       |87.29097% ~2 s remaining  |=============================================       |87.45819% ~2 s remaining  |=============================================       |87.62542% ~2 s remaining  |=============================================       |87.79264% ~2 s remaining  |=============================================       |87.95987% ~2 s remaining  |=============================================       |88.12709% ~2 s remaining  |=============================================       |88.29431% ~2 s remaining  |==============================================      |88.46154% ~2 s remaining  |==============================================      |88.62876% ~2 s remaining  |==============================================      |88.79599% ~2 s remaining  |==============================================      |88.96321% ~2 s remaining  |==============================================      |89.13043% ~2 s remaining  |==============================================      |89.29766% ~2 s remaining  |==============================================      |89.46488% ~2 s remaining  |==============================================      |89.63211% ~1 s remaining  |==============================================      |89.79933% ~1 s remaining  |==============================================      |89.96656% ~1 s remaining  |==============================================      |90.13378% ~1 s remaining  |==============================================      |90.301% ~1 s remaining    |===============================================     |90.46823% ~1 s remaining  |===============================================     |90.63545% ~1 s remaining  |===============================================     |90.80268% ~1 s remaining  |===============================================     |90.9699% ~1 s remaining   |===============================================     |91.13712% ~1 s remaining  |===============================================     |91.30435% ~1 s remaining  |===============================================     |91.47157% ~1 s remaining  |===============================================     |91.6388% ~1 s remaining   |===============================================     |91.80602% ~1 s remaining  |===============================================     |91.97324% ~1 s remaining  |===============================================     |92.14047% ~1 s remaining  |================================================    |92.30769% ~1 s remaining  |================================================    |92.47492% ~1 s remaining  |================================================    |92.64214% ~1 s remaining  |================================================    |92.80936% ~1 s remaining  |================================================    |92.97659% ~1 s remaining  |================================================    |93.14381% ~1 s remaining  |================================================    |93.31104% ~1 s remaining  |================================================    |93.47826% ~1 s remaining  |================================================    |93.64548% ~1 s remaining  |================================================    |93.81271% ~1 s remaining  |================================================    |93.97993% ~1 s remaining  |================================================    |94.14716% ~1 s remaining  |=================================================   |94.31438% ~1 s remaining  |=================================================   |94.48161% ~1 s remaining  |=================================================   |94.64883% ~1 s remaining  |=================================================   |94.81605% ~1 s remaining  |=================================================   |94.98328% ~1 s remaining  |=================================================   |95.1505% ~1 s remaining   |=================================================   |95.31773% ~1 s remaining  |=================================================   |95.48495% ~1 s remaining  |=================================================   |95.65217% ~1 s remaining  |=================================================   |95.8194% ~1 s remaining   |=================================================   |95.98662% ~1 s remaining  |==================================================  |96.15385% ~1 s remaining  |==================================================  |96.32107% ~1 s remaining  |==================================================  |96.48829% ~1 s remaining  |==================================================  |96.65552% ~1 s remaining  |==================================================  |96.82274% ~0 s remaining  |==================================================  |96.98997% ~0 s remaining  |==================================================  |97.15719% ~0 s remaining  |==================================================  |97.32441% ~0 s remaining  |==================================================  |97.49164% ~0 s remaining  |==================================================  |97.65886% ~0 s remaining  |==================================================  |97.82609% ~0 s remaining  |==================================================  |97.99331% ~0 s remaining  |=================================================== |98.16054% ~0 s remaining  |=================================================== |98.32776% ~0 s remaining  |=================================================== |98.49498% ~0 s remaining  |=================================================== |98.66221% ~0 s remaining  |=================================================== |98.82943% ~0 s remaining  |=================================================== |98.99666% ~0 s remaining  |=================================================== |99.16388% ~0 s remaining  |=================================================== |99.3311% ~0 s remaining   |=================================================== |99.49833% ~0 s remaining  |=================================================== |99.66555% ~0 s remaining  |=================================================== |99.83278% ~0 s remaining  |====================================================|100% ~0 s remaining       |====================================================|100%                      Completed after 15 s
## Starting to add information to samples
##  => Add clinical information to samples
##  => Adding TCGA molecular information from marker papers
##  => Information will have prefix 'paper_'
## luad subtype information from:doi:10.1038/nature13385
## Available assays in SummarizedExperiment : 
##   => unstranded
##   => stranded_first
##   => stranded_second
##   => tpm_unstrand
##   => fpkm_unstrand
##   => fpkm_uq_unstrand
counts <- as.data.frame(assay(data))  # Extracting the count matrix (these are supposedly raw counts)
head(counts)  # Viewing the first few rows (genes) and columns (samples)
##                    TCGA-73-4658-01A-01R-1755-07 TCGA-44-2661-11A-01R-1758-07
## ENSG00000000003.15                         3659                         1395
## ENSG00000000005.6                           188                            8
## ENSG00000000419.13                          981                         1031
## ENSG00000000457.14                          456                          541
## ENSG00000000460.17                          158                          135
## ENSG00000000938.13                         1645                         3245
##                    TCGA-55-6986-11A-01R-1949-07 TCGA-55-8615-01A-11R-2403-07
## ENSG00000000003.15                         6760                         2257
## ENSG00000000005.6                             3                            0
## ENSG00000000419.13                         2070                          644
## ENSG00000000457.14                         1110                          538
## ENSG00000000460.17                          202                          212
## ENSG00000000938.13                         4876                          616
##                    TCGA-97-8177-01A-11R-2287-07 TCGA-49-6744-11A-01R-1858-07
## ENSG00000000003.15                         5009                         1353
## ENSG00000000005.6                            13                            2
## ENSG00000000419.13                         2731                          842
## ENSG00000000457.14                          919                          417
## ENSG00000000460.17                          321                          106
## ENSG00000000938.13                         2299                         2699
##                    TCGA-67-3771-01A-01R-0946-07 TCGA-49-6744-01A-11R-1858-07
## ENSG00000000003.15                         1053                         2786
## ENSG00000000005.6                             1                           17
## ENSG00000000419.13                         1817                         1443
## ENSG00000000457.14                          598                          680
## ENSG00000000460.17                          339                          242
## ENSG00000000938.13                          902                         1458
##                    TCGA-MP-A4SW-01A-21R-A24X-07 TCGA-97-8176-01A-11R-2403-07
## ENSG00000000003.15                         3440                         3023
## ENSG00000000005.6                           333                            0
## ENSG00000000419.13                          862                         1067
## ENSG00000000457.14                          717                          579
## ENSG00000000460.17                          267                          370
## ENSG00000000938.13                         1567                          636
##                    TCGA-97-7552-01A-11R-2039-07 TCGA-55-A48Z-01A-12R-A24X-07
## ENSG00000000003.15                         2475                         6832
## ENSG00000000005.6                             3                            0
## ENSG00000000419.13                         1276                         1608
## ENSG00000000457.14                         1158                          978
## ENSG00000000460.17                          193                          409
## ENSG00000000938.13                         1520                          846
##                    TCGA-50-5944-01A-11R-1755-07 TCGA-MN-A4N5-01A-11R-A24X-07
## ENSG00000000003.15                         3266                         6122
## ENSG00000000005.6                             2                            0
## ENSG00000000419.13                         1015                         2714
## ENSG00000000457.14                          626                         1042
## ENSG00000000460.17                          147                          712
## ENSG00000000938.13                          554                          840
##                    TCGA-75-5146-01A-01R-1628-07 TCGA-97-7546-01A-11R-2039-07
## ENSG00000000003.15                         2783                         3447
## ENSG00000000005.6                             1                           15
## ENSG00000000419.13                         1131                         1736
## ENSG00000000457.14                          672                          870
## ENSG00000000460.17                          173                          393
## ENSG00000000938.13                          398                          808
##                    TCGA-55-7911-01A-11R-2170-07 TCGA-44-5643-01A-01R-1628-07
## ENSG00000000003.15                         1237                         2652
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1662                         2261
## ENSG00000000457.14                          671                          327
## ENSG00000000460.17                          435                          368
## ENSG00000000938.13                          611                          455
##                    TCGA-86-8279-01A-11R-2287-07 TCGA-75-6212-01A-11R-1755-07
## ENSG00000000003.15                         8462                         2655
## ENSG00000000005.6                             3                            2
## ENSG00000000419.13                         3576                          574
## ENSG00000000457.14                         1871                          362
## ENSG00000000460.17                          828                          142
## ENSG00000000938.13                          788                          463
##                    TCGA-55-8299-01A-11R-2287-07 TCGA-83-5908-01A-21R-2287-07
## ENSG00000000003.15                         2143                         3101
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1452                         2763
## ENSG00000000457.14                          737                         1255
## ENSG00000000460.17                          430                         1518
## ENSG00000000938.13                         1304                         1104
##                    TCGA-44-2655-01A-01R-0946-07 TCGA-50-6597-01A-11R-1858-07
## ENSG00000000003.15                         7709                         1599
## ENSG00000000005.6                             4                            0
## ENSG00000000419.13                         2754                          515
## ENSG00000000457.14                         2163                          295
## ENSG00000000460.17                          499                           21
## ENSG00000000938.13                         1212                          246
##                    TCGA-44-6776-11A-01R-1858-07 TCGA-44-6147-01B-06R-A277-07
## ENSG00000000003.15                         1200                         1002
## ENSG00000000005.6                             2                           10
## ENSG00000000419.13                          909                          284
## ENSG00000000457.14                          373                          549
## ENSG00000000460.17                           70                          497
## ENSG00000000938.13                         2123                          177
##                    TCGA-44-6776-01A-11R-1858-07 TCGA-55-8511-01A-11R-2403-07
## ENSG00000000003.15                         1384                         2376
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                          642                         1027
## ENSG00000000457.14                          602                          599
## ENSG00000000460.17                          119                          333
## ENSG00000000938.13                          184                          719
##                    TCGA-95-7948-01A-11R-2187-07 TCGA-97-8172-01A-11R-2287-07
## ENSG00000000003.15                         3087                         2219
## ENSG00000000005.6                             0                            5
## ENSG00000000419.13                         2389                         1622
## ENSG00000000457.14                         1171                         2548
## ENSG00000000460.17                          282                          427
## ENSG00000000938.13                          240                         1892
##                    TCGA-55-6979-01A-11R-1949-07 TCGA-55-6979-11A-01R-1949-07
## ENSG00000000003.15                         3103                          513
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1030                          585
## ENSG00000000457.14                          489                          304
## ENSG00000000460.17                          462                           77
## ENSG00000000938.13                         1242                         3081
##                    TCGA-44-6146-11A-01R-1858-07 TCGA-50-5939-11A-01R-1628-07
## ENSG00000000003.15                          548                          829
## ENSG00000000005.6                             1                            6
## ENSG00000000419.13                          722                          829
## ENSG00000000457.14                          403                          334
## ENSG00000000460.17                           73                           93
## ENSG00000000938.13                         1535                         2289
##                    TCGA-05-4410-01A-21R-1858-07 TCGA-64-1677-01A-01R-0946-07
## ENSG00000000003.15                         1583                         2474
## ENSG00000000005.6                             2                          188
## ENSG00000000419.13                          483                         1754
## ENSG00000000457.14                          597                          278
## ENSG00000000460.17                          183                          204
## ENSG00000000938.13                          598                          305
##                    TCGA-78-7633-01A-11R-2066-07 TCGA-55-7724-01A-11R-2170-07
## ENSG00000000003.15                         4673                         1676
## ENSG00000000005.6                             2                            1
## ENSG00000000419.13                         1715                          856
## ENSG00000000457.14                         1975                          361
## ENSG00000000460.17                          594                          135
## ENSG00000000938.13                          421                          638
##                    TCGA-05-4405-01A-21R-1858-07 TCGA-J2-8194-01A-11R-2241-07
## ENSG00000000003.15                         2438                         6038
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          593                         1470
## ENSG00000000457.14                          701                         1446
## ENSG00000000460.17                          297                          244
## ENSG00000000938.13                          450                         1059
##                    TCGA-44-8119-01A-11R-2241-07 TCGA-62-A471-01A-12R-A24H-07
## ENSG00000000003.15                         6656                         4275
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         3399                         1071
## ENSG00000000457.14                         1292                          630
## ENSG00000000460.17                          735                          542
## ENSG00000000938.13                         1325                          372
##                    TCGA-86-8674-01A-21R-2403-07 TCGA-55-A48Y-01A-11R-A24H-07
## ENSG00000000003.15                         2352                         4118
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                          911                         1117
## ENSG00000000457.14                          760                          632
## ENSG00000000460.17                          206                          389
## ENSG00000000938.13                          136                          472
##                    TCGA-91-6829-01A-21R-1858-07 TCGA-91-6829-11A-01R-1858-07
## ENSG00000000003.15                         2018                          780
## ENSG00000000005.6                             0                            2
## ENSG00000000419.13                         1252                          789
## ENSG00000000457.14                          556                          406
## ENSG00000000460.17                          316                          146
## ENSG00000000938.13                          420                        10095
##                    TCGA-69-7763-01A-11R-2170-07 TCGA-64-5779-01A-01R-1628-07
## ENSG00000000003.15                         1624                         1526
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1588                         1377
## ENSG00000000457.14                          498                          537
## ENSG00000000460.17                          114                          305
## ENSG00000000938.13                          743                          333
##                    TCGA-78-8640-01A-11R-2403-07 TCGA-64-1680-01A-02R-0946-07
## ENSG00000000003.15                          997                         4313
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         1227                         1337
## ENSG00000000457.14                          852                          591
## ENSG00000000460.17                          527                          144
## ENSG00000000938.13                         1034                          166
##                    TCGA-38-4626-11A-01R-1758-07 TCGA-55-6980-01A-11R-1949-07
## ENSG00000000003.15                         2370                         4407
## ENSG00000000005.6                             1                            2
## ENSG00000000419.13                         2770                          513
## ENSG00000000457.14                          621                          336
## ENSG00000000460.17                          166                           97
## ENSG00000000938.13                        17257                          576
##                    TCGA-55-6980-11A-01R-1949-07 TCGA-91-6835-11A-01R-1858-07
## ENSG00000000003.15                          618                         2510
## ENSG00000000005.6                             0                            4
## ENSG00000000419.13                          570                          881
## ENSG00000000457.14                          346                          506
## ENSG00000000460.17                           78                           96
## ENSG00000000938.13                         1476                         1062
##                    TCGA-86-8585-01A-11R-2403-07 TCGA-75-6211-01A-11R-1755-07
## ENSG00000000003.15                         4460                         3513
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         1109                         1436
## ENSG00000000457.14                          411                          492
## ENSG00000000460.17                          283                          363
## ENSG00000000938.13                         2004                          162
##                    TCGA-50-5066-01A-01R-1628-07 TCGA-78-7149-01A-11R-2039-07
## ENSG00000000003.15                         3580                         2351
## ENSG00000000005.6                             1                            3
## ENSG00000000419.13                         2836                          953
## ENSG00000000457.14                          533                          699
## ENSG00000000460.17                          433                          152
## ENSG00000000938.13                          533                          270
##                    TCGA-97-A4M2-01A-12R-A24X-07 TCGA-44-6777-01A-11R-1858-07
## ENSG00000000003.15                         1459                         1728
## ENSG00000000005.6                             0                            3
## ENSG00000000419.13                         1031                          781
## ENSG00000000457.14                          861                          490
## ENSG00000000460.17                          187                          218
## ENSG00000000938.13                         6849                         2347
##                    TCGA-67-3774-01A-01R-0946-07 TCGA-05-4426-01A-01R-1206-07
## ENSG00000000003.15                         2698                         9468
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                          738                         2420
## ENSG00000000457.14                          611                         1100
## ENSG00000000460.17                          203                          417
## ENSG00000000938.13                          862                         1515
##                    TCGA-55-6982-11A-01R-1949-07 TCGA-55-A4DF-01A-11R-A24H-07
## ENSG00000000003.15                          950                         3943
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                          706                         3162
## ENSG00000000457.14                          475                          726
## ENSG00000000460.17                           87                          603
## ENSG00000000938.13                         2201                         1578
##                    TCGA-97-7553-01A-21R-2039-07 TCGA-MP-A4TD-01A-32R-A262-07
## ENSG00000000003.15                         5176                         3071
## ENSG00000000005.6                             8                            0
## ENSG00000000419.13                         1899                         1155
## ENSG00000000457.14                          856                          660
## ENSG00000000460.17                          441                          362
## ENSG00000000938.13                         4358                          664
##                    TCGA-50-5930-01A-11R-1755-07 TCGA-95-A4VN-01A-11R-A262-07
## ENSG00000000003.15                         1155                         1959
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          460                         1104
## ENSG00000000457.14                          156                          837
## ENSG00000000460.17                           90                          524
## ENSG00000000938.13                          437                         1645
##                    TCGA-44-3396-11A-01R-1758-07 TCGA-93-A4JO-01A-21R-A24X-07
## ENSG00000000003.15                         2615                         3541
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                         1664                         1982
## ENSG00000000457.14                          858                          854
## ENSG00000000460.17                          172                          431
## ENSG00000000938.13                         9948                         1763
##                    TCGA-49-AAR0-01A-21R-A39D-07 TCGA-67-4679-01B-01R-1755-07
## ENSG00000000003.15                         1443                         1655
## ENSG00000000005.6                             1                            2
## ENSG00000000419.13                         1716                          895
## ENSG00000000457.14                          669                         1100
## ENSG00000000460.17                          302                          199
## ENSG00000000938.13                         2315                          814
##                    TCGA-O1-A52J-01A-11R-A262-07 TCGA-05-4420-01A-01R-1206-07
## ENSG00000000003.15                         2730                         8815
## ENSG00000000005.6                             0                            2
## ENSG00000000419.13                         1583                         3824
## ENSG00000000457.14                          804                         1011
## ENSG00000000460.17                          356                          751
## ENSG00000000938.13                         2852                          920
##                    TCGA-55-6642-01A-11R-1858-07 TCGA-86-7953-01A-11R-2187-07
## ENSG00000000003.15                         1672                         4519
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                          729                         1322
## ENSG00000000457.14                          476                          911
## ENSG00000000460.17                          206                         1180
## ENSG00000000938.13                          278                         1796
##                    TCGA-55-6983-11A-01R-1949-07 TCGA-53-7624-01A-11R-2066-07
## ENSG00000000003.15                          897                         2020
## ENSG00000000005.6                             1                            1
## ENSG00000000419.13                          735                         1419
## ENSG00000000457.14                          321                          891
## ENSG00000000460.17                           55                          813
## ENSG00000000938.13                         1984                          322
##                    TCGA-97-A4M3-01A-11R-A24X-07 TCGA-80-5608-01A-31R-1949-07
## ENSG00000000003.15                         1430                         5351
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1196                         1269
## ENSG00000000457.14                          617                          909
## ENSG00000000460.17                          190                          456
## ENSG00000000938.13                          341                          986
##                    TCGA-55-8620-01A-11R-2403-07 TCGA-62-8395-01A-11R-2326-07
## ENSG00000000003.15                         1097                         1548
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                         1486                         1239
## ENSG00000000457.14                          200                          681
## ENSG00000000460.17                          215                          184
## ENSG00000000938.13                          633                          522
##                    TCGA-75-6207-01A-11R-1755-07 TCGA-50-5068-01A-01R-1628-07
## ENSG00000000003.15                         2473                         2897
## ENSG00000000005.6                             8                            2
## ENSG00000000419.13                         1801                         3171
## ENSG00000000457.14                          839                          771
## ENSG00000000460.17                          394                          249
## ENSG00000000938.13                          666                          532
##                    TCGA-NJ-A4YF-01A-12R-A262-07 TCGA-44-6145-01A-11R-1755-07
## ENSG00000000003.15                         1267                         1398
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         3955                         1287
## ENSG00000000457.14                          362                          656
## ENSG00000000460.17                          210                          303
## ENSG00000000938.13                          492                          739
##                    TCGA-05-4424-01A-22R-1858-07 TCGA-86-A4P8-01A-11R-A24X-07
## ENSG00000000003.15                         3373                         1157
## ENSG00000000005.6                             0                            2
## ENSG00000000419.13                         1045                         1235
## ENSG00000000457.14                          662                          805
## ENSG00000000460.17                          386                          208
## ENSG00000000938.13                         1067                         4548
##                    TCGA-67-3772-01A-01R-0946-07 TCGA-44-4112-01B-06R-A277-07
## ENSG00000000003.15                         9732                          591
## ENSG00000000005.6                            20                           20
## ENSG00000000419.13                         2166                          383
## ENSG00000000457.14                         1000                          871
## ENSG00000000460.17                          345                          462
## ENSG00000000938.13                          855                          204
##                    TCGA-44-4112-01A-01R-A278-07 TCGA-55-7994-01A-11R-2187-07
## ENSG00000000003.15                         1309                         3841
## ENSG00000000005.6                             7                           18
## ENSG00000000419.13                         1123                         1625
## ENSG00000000457.14                         1064                         1118
## ENSG00000000460.17                          334                          762
## ENSG00000000938.13                          211                         2424
##                    TCGA-49-4490-01A-21R-1858-07 TCGA-35-4123-01A-01R-1107-07
## ENSG00000000003.15                         3760                         7006
## ENSG00000000005.6                             3                            0
## ENSG00000000419.13                          632                         2860
## ENSG00000000457.14                          351                          983
## ENSG00000000460.17                          106                          883
## ENSG00000000938.13                          239                          968
##                    TCGA-05-4425-01A-01R-1755-07 TCGA-49-4490-11A-01R-1858-07
## ENSG00000000003.15                         1539                         1524
## ENSG00000000005.6                             0                            2
## ENSG00000000419.13                         1282                         1677
## ENSG00000000457.14                          445                          414
## ENSG00000000460.17                          198                           99
## ENSG00000000938.13                          852                         1392
##                    TCGA-55-8204-01A-11R-2241-07 TCGA-MP-A4T9-01A-11R-A24X-07
## ENSG00000000003.15                         7254                         7321
## ENSG00000000005.6                             2                            4
## ENSG00000000419.13                         2428                         1475
## ENSG00000000457.14                          716                          629
## ENSG00000000460.17                          521                          303
## ENSG00000000938.13                          735                         1770
##                    TCGA-78-7539-01A-11R-2066-07 TCGA-50-5933-11A-01R-1755-07
## ENSG00000000003.15                         4165                         1110
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1534                         1192
## ENSG00000000457.14                         1162                          275
## ENSG00000000460.17                          446                           74
## ENSG00000000938.13                         2130                         3373
##                    TCGA-50-5933-01A-11R-1755-07 TCGA-MP-A4T8-01A-11R-A24X-07
## ENSG00000000003.15                         3500                         4768
## ENSG00000000005.6                            17                            1
## ENSG00000000419.13                         1525                         1494
## ENSG00000000457.14                          467                          719
## ENSG00000000460.17                          420                          426
## ENSG00000000938.13                         1345                          298
##                    TCGA-55-6970-11A-01R-1949-07 TCGA-55-8302-01A-11R-2326-07
## ENSG00000000003.15                          783                         3018
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          783                         1159
## ENSG00000000457.14                          380                          444
## ENSG00000000460.17                           72                          323
## ENSG00000000938.13                         2418                         1040
##                    TCGA-44-A47A-01A-21R-A24H-07 TCGA-MP-A4TF-01A-11R-A262-07
## ENSG00000000003.15                         1686                         2421
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         1064                         1617
## ENSG00000000457.14                         1167                          814
## ENSG00000000460.17                          329                          415
## ENSG00000000938.13                         1603                          380
##                    TCGA-50-7109-01A-11R-2039-07 TCGA-78-7161-01A-11R-2039-07
## ENSG00000000003.15                         2624                         7079
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         1964                         1899
## ENSG00000000457.14                          431                          959
## ENSG00000000460.17                          198                          252
## ENSG00000000938.13                          634                          293
##                    TCGA-55-6971-01A-11R-1949-07 TCGA-86-A456-01A-11R-A24H-07
## ENSG00000000003.15                         1369                         1945
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          703                         1268
## ENSG00000000457.14                          479                          441
## ENSG00000000460.17                          140                          119
## ENSG00000000938.13                         1004                         1445
##                    TCGA-44-6148-11A-01R-1858-07 TCGA-44-6148-01A-11R-1755-07
## ENSG00000000003.15                         3135                         4774
## ENSG00000000005.6                             4                            9
## ENSG00000000419.13                         1152                          974
## ENSG00000000457.14                          725                          838
## ENSG00000000460.17                          166                          149
## ENSG00000000938.13                         3046                          750
##                    TCGA-MP-A5C7-01A-11R-A262-07 TCGA-49-AAQV-01A-11R-A39D-07
## ENSG00000000003.15                         3424                         8559
## ENSG00000000005.6                             1                            2
## ENSG00000000419.13                         1700                         2325
## ENSG00000000457.14                         2170                         1365
## ENSG00000000460.17                          457                          671
## ENSG00000000938.13                          263                          775
##                    TCGA-75-6205-01A-11R-1755-07 TCGA-44-A47B-01A-11R-A24H-07
## ENSG00000000003.15                          626                         2370
## ENSG00000000005.6                             0                            8
## ENSG00000000419.13                          576                         1290
## ENSG00000000457.14                          264                          713
## ENSG00000000460.17                          163                          496
## ENSG00000000938.13                          958                          847
##                    TCGA-50-5049-01A-01R-1628-07 TCGA-55-5899-01A-11R-1628-07
## ENSG00000000003.15                         1638                         1146
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1563                         1397
## ENSG00000000457.14                          655                          335
## ENSG00000000460.17                          369                          343
## ENSG00000000938.13                         1734                          509
##                    TCGA-50-5045-01A-01R-1628-07 TCGA-69-8453-01A-12R-2326-07
## ENSG00000000003.15                         6341                         1671
## ENSG00000000005.6                            31                            1
## ENSG00000000419.13                         2299                         1100
## ENSG00000000457.14                          637                          649
## ENSG00000000460.17                          289                          226
## ENSG00000000938.13                         2541                         5589
##                    TCGA-55-A491-01A-11R-A24H-07 TCGA-49-6743-11A-01R-1858-07
## ENSG00000000003.15                         5849                         1281
## ENSG00000000005.6                             2                          107
## ENSG00000000419.13                         1227                         1042
## ENSG00000000457.14                          757                          334
## ENSG00000000460.17                          520                           94
## ENSG00000000938.13                          936                         1647
##                    TCGA-49-6743-01A-11R-1858-07 TCGA-78-7150-01A-21R-2039-07
## ENSG00000000003.15                         1085                         3440
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1382                         1478
## ENSG00000000457.14                          610                          625
## ENSG00000000460.17                          599                          749
## ENSG00000000938.13                          543                          227
##                    TCGA-95-A4VP-01A-21R-A262-07 TCGA-49-4512-01A-21R-1858-07
## ENSG00000000003.15                         3648                         1516
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                          955                          837
## ENSG00000000457.14                          526                          293
## ENSG00000000460.17                          293                           87
## ENSG00000000938.13                         1147                          695
##                    TCGA-86-7701-01A-11R-2170-07 TCGA-55-8513-01A-11R-2403-07
## ENSG00000000003.15                         1086                         1428
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          687                          624
## ENSG00000000457.14                          304                          477
## ENSG00000000460.17                          180                          108
## ENSG00000000938.13                          270                         3715
##                    TCGA-55-8206-01A-11R-2241-07 TCGA-44-7660-01A-11R-2066-07
## ENSG00000000003.15                         5487                         6616
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1804                         2332
## ENSG00000000457.14                         1798                          998
## ENSG00000000460.17                          337                          614
## ENSG00000000938.13                         3921                          734
##                    TCGA-95-7944-01A-11R-2187-07 TCGA-L9-A743-01A-43R-A39D-07
## ENSG00000000003.15                         1566                         2559
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         2666                         1415
## ENSG00000000457.14                          701                          561
## ENSG00000000460.17                          642                          336
## ENSG00000000938.13                         1327                         1778
##                    TCGA-55-7995-01A-11R-2187-07 TCGA-97-7554-01A-11R-2039-07
## ENSG00000000003.15                         4577                         2777
## ENSG00000000005.6                             0                           11
## ENSG00000000419.13                         2329                         1205
## ENSG00000000457.14                         1257                          611
## ENSG00000000460.17                          823                          376
## ENSG00000000938.13                         3079                          763
##                    TCGA-50-6673-01A-11R-1949-07 TCGA-49-4514-01A-21R-1858-07
## ENSG00000000003.15                         2904                         2261
## ENSG00000000005.6                             2                            0
## ENSG00000000419.13                         1071                         1792
## ENSG00000000457.14                          437                          246
## ENSG00000000460.17                          260                          131
## ENSG00000000938.13                          713                          275
##                    TCGA-NJ-A55A-01A-11R-A262-07 TCGA-78-7160-01A-11R-2039-07
## ENSG00000000003.15                         3009                         1946
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                          753                          734
## ENSG00000000457.14                          499                          724
## ENSG00000000460.17                          137                          450
## ENSG00000000938.13                          380                          776
##                    TCGA-55-7727-01A-11R-2170-07 TCGA-44-2668-11A-01R-1758-07
## ENSG00000000003.15                         1804                          985
## ENSG00000000005.6                             0                            2
## ENSG00000000419.13                         1118                         1278
## ENSG00000000457.14                          554                          413
## ENSG00000000460.17                          196                          135
## ENSG00000000938.13                          243                        12378
##                    TCGA-62-A46R-01A-11R-A24H-07 TCGA-55-7907-01A-11R-2170-07
## ENSG00000000003.15                         2723                         1877
## ENSG00000000005.6                             0                            4
## ENSG00000000419.13                         1637                          920
## ENSG00000000457.14                          805                          417
## ENSG00000000460.17                          358                          279
## ENSG00000000938.13                         1153                          448
##                    TCGA-44-2662-01B-02R-A277-07 TCGA-44-6779-01A-11R-1858-07
## ENSG00000000003.15                          667                         1961
## ENSG00000000005.6                            10                            0
## ENSG00000000419.13                          554                          385
## ENSG00000000457.14                          716                          355
## ENSG00000000460.17                          619                          261
## ENSG00000000938.13                          455                          315
##                    TCGA-50-5055-01A-01R-1628-07 TCGA-67-6216-01A-11R-1755-07
## ENSG00000000003.15                         2078                         5010
## ENSG00000000005.6                             5                          917
## ENSG00000000419.13                         1004                          640
## ENSG00000000457.14                          368                          522
## ENSG00000000460.17                          117                          275
## ENSG00000000938.13                          763                          649
##                    TCGA-05-4415-01A-22R-1858-07 TCGA-86-7713-01A-11R-2066-07
## ENSG00000000003.15                         2626                         8522
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          865                         2548
## ENSG00000000457.14                          585                         2384
## ENSG00000000460.17                          730                         1790
## ENSG00000000938.13                          266                          491
##                    TCGA-86-8073-01A-11R-2241-07 TCGA-55-6969-01A-11R-1949-07
## ENSG00000000003.15                         3418                         2233
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         2937                         2803
## ENSG00000000457.14                         1008                          940
## ENSG00000000460.17                          592                          647
## ENSG00000000938.13                         1926                         1136
##                    TCGA-38-4627-01A-01R-1206-07 TCGA-MN-A4N4-01A-12R-A24X-07
## ENSG00000000003.15                        10673                         3365
## ENSG00000000005.6                             3                            0
## ENSG00000000419.13                         2002                         2244
## ENSG00000000457.14                          605                          777
## ENSG00000000460.17                          335                          451
## ENSG00000000938.13                         1185                          507
##                    TCGA-62-A46O-01A-11R-A24H-07 TCGA-86-8280-01A-11R-2287-07
## ENSG00000000003.15                         5948                         7090
## ENSG00000000005.6                             0                            7
## ENSG00000000419.13                         1174                         1599
## ENSG00000000457.14                          628                         1377
## ENSG00000000460.17                          572                          441
## ENSG00000000938.13                          113                         1778
##                    TCGA-67-6215-01A-11R-1755-07 TCGA-44-A47G-01A-21R-A24H-07
## ENSG00000000003.15                         1975                         2272
## ENSG00000000005.6                             4                            0
## ENSG00000000419.13                         1197                          851
## ENSG00000000457.14                         1705                          565
## ENSG00000000460.17                          560                          199
## ENSG00000000938.13                         1658                         2245
##                    TCGA-78-7536-01A-11R-2066-07 TCGA-44-5645-11A-01R-1628-07
## ENSG00000000003.15                         4409                         1294
## ENSG00000000005.6                             0                            3
## ENSG00000000419.13                         3799                         1108
## ENSG00000000457.14                         1998                          673
## ENSG00000000460.17                         1683                          114
## ENSG00000000938.13                         1088                         3578
##                    TCGA-91-6831-01A-11R-1858-07 TCGA-97-A4M0-01A-11R-A24X-07
## ENSG00000000003.15                         1419                          943
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1693                         1236
## ENSG00000000457.14                          541                         1215
## ENSG00000000460.17                          508                          390
## ENSG00000000938.13                          557                         1161
##                    TCGA-44-5645-01B-04R-A277-07 TCGA-55-A493-01A-11R-A24H-07
## ENSG00000000003.15                         1807                         1841
## ENSG00000000005.6                            41                           19
## ENSG00000000419.13                          620                         1308
## ENSG00000000457.14                         1368                          445
## ENSG00000000460.17                          695                          414
## ENSG00000000938.13                          227                         1800
##                    TCGA-55-6985-01A-11R-1949-07 TCGA-55-6543-01A-11R-1755-07
## ENSG00000000003.15                         3932                         4662
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                          980                         1414
## ENSG00000000457.14                          407                          543
## ENSG00000000460.17                          207                          161
## ENSG00000000938.13                          554                         1236
##                    TCGA-50-5936-01A-11R-1628-07 TCGA-05-4432-01A-01R-1206-07
## ENSG00000000003.15                         2532                         3883
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          616                         2952
## ENSG00000000457.14                          487                         1682
## ENSG00000000460.17                          310                         1267
## ENSG00000000938.13                          517                         1829
##                    TCGA-93-7348-01A-21R-2039-07 TCGA-55-6984-11A-01R-1949-07
## ENSG00000000003.15                         2336                         2056
## ENSG00000000005.6                             1                            5
## ENSG00000000419.13                         1095                         1573
## ENSG00000000457.14                          715                          774
## ENSG00000000460.17                          286                          154
## ENSG00000000938.13                          412                         3092
##                    TCGA-55-6984-01A-11R-1949-07 TCGA-78-7147-01A-11R-2039-07
## ENSG00000000003.15                          843                         8038
## ENSG00000000005.6                             0                            6
## ENSG00000000419.13                          739                         1763
## ENSG00000000457.14                          653                         1005
## ENSG00000000460.17                          148                          552
## ENSG00000000938.13                          312                          510
##                    TCGA-50-5941-01A-11R-1755-07 TCGA-4B-A93V-01A-11R-A39D-07
## ENSG00000000003.15                         3341                         5108
## ENSG00000000005.6                             1                            1
## ENSG00000000419.13                         1340                         1570
## ENSG00000000457.14                          603                          641
## ENSG00000000460.17                          231                          525
## ENSG00000000938.13                         1189                          577
##                    TCGA-91-A4BC-01A-11R-A24H-07 TCGA-78-7167-01A-11R-2066-07
## ENSG00000000003.15                         1501                         3739
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1483                         2047
## ENSG00000000457.14                          363                         2240
## ENSG00000000460.17                          286                          525
## ENSG00000000938.13                          560                          355
##                    TCGA-95-7043-01A-11R-1949-07 TCGA-67-6217-01A-11R-1755-07
## ENSG00000000003.15                         1475                         2119
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1275                         1026
## ENSG00000000457.14                          373                          752
## ENSG00000000460.17                          172                          189
## ENSG00000000938.13                           81                          539
##                    TCGA-55-7726-01A-11R-2170-07 TCGA-49-6745-11A-01R-1858-07
## ENSG00000000003.15                         1188                         2183
## ENSG00000000005.6                             0                            3
## ENSG00000000419.13                         1564                         1573
## ENSG00000000457.14                          269                          480
## ENSG00000000460.17                          219                           87
## ENSG00000000938.13                          321                         1621
##                    TCGA-49-6745-01A-11R-1858-07 TCGA-50-5932-11A-01R-1755-07
## ENSG00000000003.15                         2687                          622
## ENSG00000000005.6                             3                            1
## ENSG00000000419.13                         1157                          619
## ENSG00000000457.14                          443                          411
## ENSG00000000460.17                          345                           79
## ENSG00000000938.13                         1059                         1896
##                    TCGA-55-8619-01A-11R-2403-07 TCGA-78-7166-01A-12R-2066-07
## ENSG00000000003.15                         1020                         2966
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                          831                         1346
## ENSG00000000457.14                          518                         1050
## ENSG00000000460.17                          132                          376
## ENSG00000000938.13                         2581                          434
##                    TCGA-55-1596-01A-01R-0946-07 TCGA-73-4670-01A-01R-1206-07
## ENSG00000000003.15                         6067                         4513
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         2126                         4348
## ENSG00000000457.14                          677                         1080
## ENSG00000000460.17                          651                         1161
## ENSG00000000938.13                          362                          904
##                    TCGA-05-5420-01A-01R-1628-07 TCGA-95-7562-01A-11R-2241-07
## ENSG00000000003.15                         3641                         9980
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1804                         2705
## ENSG00000000457.14                          687                         1459
## ENSG00000000460.17                          338                         1699
## ENSG00000000938.13                         1533                          884
##                    TCGA-95-7947-01A-11R-2187-07 TCGA-55-8090-01A-11R-2241-07
## ENSG00000000003.15                         5315                         3866
## ENSG00000000005.6                             9                            0
## ENSG00000000419.13                         3541                         1516
## ENSG00000000457.14                         1756                          779
## ENSG00000000460.17                          918                          218
## ENSG00000000938.13                          969                         1468
##                    TCGA-73-7498-01A-12R-2187-07 TCGA-44-6146-01A-11R-A278-07
## ENSG00000000003.15                         5042                          791
## ENSG00000000005.6                             4                            0
## ENSG00000000419.13                         1966                          881
## ENSG00000000457.14                         1920                          639
## ENSG00000000460.17                          410                           98
## ENSG00000000938.13                         1404                           77
##                    TCGA-44-6146-01A-11R-1755-07 TCGA-91-7771-01A-11R-2170-07
## ENSG00000000003.15                         1430                         3663
## ENSG00000000005.6                             0                            3
## ENSG00000000419.13                         1562                         2084
## ENSG00000000457.14                          998                          640
## ENSG00000000460.17                          157                          228
## ENSG00000000938.13                          261                         1200
##                    TCGA-44-7670-01A-11R-2066-07 TCGA-49-4501-01A-01R-1206-07
## ENSG00000000003.15                         6072                         8645
## ENSG00000000005.6                             0                            8
## ENSG00000000419.13                         4411                         3076
## ENSG00000000457.14                          593                         1623
## ENSG00000000460.17                          748                          517
## ENSG00000000938.13                          488                         1273
##                    TCGA-05-4427-01A-21R-1858-07 TCGA-MP-A4T4-01A-11R-A262-07
## ENSG00000000003.15                         1413                         2165
## ENSG00000000005.6                            86                            0
## ENSG00000000419.13                         1452                         1587
## ENSG00000000457.14                          375                          747
## ENSG00000000460.17                          387                          370
## ENSG00000000938.13                          886                         1634
##                    TCGA-44-7671-01A-11R-2066-07 TCGA-78-8660-01A-11R-2403-07
## ENSG00000000003.15                         3794                         1486
## ENSG00000000005.6                             7                            2
## ENSG00000000419.13                         1695                         1226
## ENSG00000000457.14                         1007                          465
## ENSG00000000460.17                          295                          441
## ENSG00000000938.13                          408                         1223
##                    TCGA-55-6978-11A-01R-1949-07 TCGA-05-4244-01A-01R-1107-07
## ENSG00000000003.15                         1500                         5001
## ENSG00000000005.6                             3                            0
## ENSG00000000419.13                          869                         1452
## ENSG00000000457.14                          367                         1308
## ENSG00000000460.17                           89                          789
## ENSG00000000938.13                         2130                         1963
##                    TCGA-J2-8192-01A-11R-2241-07 TCGA-L4-A4E5-01A-11R-A24X-07
## ENSG00000000003.15                         9590                         1501
## ENSG00000000005.6                             3                            0
## ENSG00000000419.13                         2299                         1769
## ENSG00000000457.14                         1423                          790
## ENSG00000000460.17                          419                          330
## ENSG00000000938.13                         2024                          348
##                    TCGA-86-8055-01A-11R-2241-07 TCGA-55-A48X-01A-11R-A24H-07
## ENSG00000000003.15                         8641                         3709
## ENSG00000000005.6                             3                            0
## ENSG00000000419.13                         2135                         1283
## ENSG00000000457.14                         1175                         1022
## ENSG00000000460.17                          549                          452
## ENSG00000000938.13                         1499                          830
##                    TCGA-71-8520-01A-11R-2403-07 TCGA-55-8096-01A-11R-2241-07
## ENSG00000000003.15                         2582                         8028
## ENSG00000000005.6                             0                            3
## ENSG00000000419.13                          878                         2142
## ENSG00000000457.14                          571                         1352
## ENSG00000000460.17                          424                          622
## ENSG00000000938.13                          324                         2641
##                    TCGA-50-5930-11A-01R-1755-07 TCGA-97-8171-01A-11R-2287-07
## ENSG00000000003.15                          459                         3729
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          725                         1589
## ENSG00000000457.14                          372                         2702
## ENSG00000000460.17                           74                          631
## ENSG00000000938.13                         5527                          468
##                    TCGA-55-A492-01A-11R-A24H-07 TCGA-95-7567-01A-11R-2066-07
## ENSG00000000003.15                         3035                         2546
## ENSG00000000005.6                             1                          108
## ENSG00000000419.13                         1344                         3384
## ENSG00000000457.14                         1052                         1409
## ENSG00000000460.17                          157                          908
## ENSG00000000938.13                          382                          807
##                    TCGA-44-3396-01A-01R-1206-07 TCGA-69-7974-01A-11R-2187-07
## ENSG00000000003.15                         9058                         3169
## ENSG00000000005.6                             3                           47
## ENSG00000000419.13                         4525                         2278
## ENSG00000000457.14                          977                          943
## ENSG00000000460.17                          870                          585
## ENSG00000000938.13                         4621                         2660
##                    TCGA-44-6775-01C-02R-A277-07 TCGA-44-6775-01A-11R-1858-07
## ENSG00000000003.15                         1218                         3446
## ENSG00000000005.6                            10                            2
## ENSG00000000419.13                          558                         1469
## ENSG00000000457.14                          830                          706
## ENSG00000000460.17                          463                          415
## ENSG00000000938.13                          305                         1332
##                    TCGA-44-6775-01A-11R-A278-07 TCGA-44-2656-01A-02R-A278-07
## ENSG00000000003.15                         2487                         4199
## ENSG00000000005.6                             2                            5
## ENSG00000000419.13                         1141                         2074
## ENSG00000000457.14                          845                         1952
## ENSG00000000460.17                          413                          666
## ENSG00000000938.13                          742                          878
##                    TCGA-NJ-A4YP-01A-11R-A262-07 TCGA-44-2656-01B-06R-A277-07
## ENSG00000000003.15                         4154                         1196
## ENSG00000000005.6                            20                            4
## ENSG00000000419.13                         3191                          359
## ENSG00000000457.14                         2074                          621
## ENSG00000000460.17                         1090                          373
## ENSG00000000938.13                         1082                          328
##                    TCGA-53-7813-01A-11R-2170-07 TCGA-35-4122-01A-01R-1107-07
## ENSG00000000003.15                         1952                        10798
## ENSG00000000005.6                             0                           36
## ENSG00000000419.13                          956                         3551
## ENSG00000000457.14                          573                          542
## ENSG00000000460.17                          195                         1115
## ENSG00000000938.13                          175                         1698
##                    TCGA-86-8056-01A-11R-2241-07 TCGA-78-7159-01A-11R-2039-07
## ENSG00000000003.15                         2472                         4288
## ENSG00000000005.6                             2                            1
## ENSG00000000419.13                         2471                         1335
## ENSG00000000457.14                          755                          637
## ENSG00000000460.17                          209                          539
## ENSG00000000938.13                         1891                          565
##                    TCGA-91-8497-01A-11R-2403-07 TCGA-86-8074-01A-11R-2241-07
## ENSG00000000003.15                          769                         8673
## ENSG00000000005.6                             7                            1
## ENSG00000000419.13                          725                         3153
## ENSG00000000457.14                          536                         1713
## ENSG00000000460.17                          109                         1067
## ENSG00000000938.13                         1204                         1226
##                    TCGA-05-4249-01A-01R-1107-07 TCGA-78-7145-01A-11R-2039-07
## ENSG00000000003.15                         4383                         3519
## ENSG00000000005.6                             0                            3
## ENSG00000000419.13                         2006                         1422
## ENSG00000000457.14                         1632                          490
## ENSG00000000460.17                          482                          459
## ENSG00000000938.13                         1209                          698
##                    TCGA-78-7158-01A-11R-2039-07 TCGA-71-6725-01A-11R-1858-07
## ENSG00000000003.15                         2778                         4597
## ENSG00000000005.6                             3                            0
## ENSG00000000419.13                         1145                         2710
## ENSG00000000457.14                         4417                         1064
## ENSG00000000460.17                          421                          326
## ENSG00000000938.13                          390                         1230
##                    TCGA-86-7714-01A-12R-2170-07 TCGA-55-6983-01A-11R-1949-07
## ENSG00000000003.15                         2223                         4826
## ENSG00000000005.6                             2                            0
## ENSG00000000419.13                          711                         1471
## ENSG00000000457.14                          397                         1135
## ENSG00000000460.17                          155                          334
## ENSG00000000938.13                          329                         1036
##                    TCGA-78-7163-01A-12R-2066-07 TCGA-62-A46Y-01A-11R-A24H-07
## ENSG00000000003.15                         9492                         5460
## ENSG00000000005.6                             1                            1
## ENSG00000000419.13                         2524                         1682
## ENSG00000000457.14                          741                          855
## ENSG00000000460.17                          225                          253
## ENSG00000000938.13                          110                         1256
##                    TCGA-35-5375-01A-01R-1628-07 TCGA-97-A4M5-01A-11R-A24X-07
## ENSG00000000003.15                         3715                         4006
## ENSG00000000005.6                             0                            3
## ENSG00000000419.13                         2468                         1433
## ENSG00000000457.14                          249                          820
## ENSG00000000460.17                          395                          258
## ENSG00000000938.13                          150                         2105
##                    TCGA-97-7547-01A-11R-2039-07 TCGA-44-6778-01A-11R-1858-07
## ENSG00000000003.15                         7284                          886
## ENSG00000000005.6                             2                            0
## ENSG00000000419.13                         1092                          988
## ENSG00000000457.14                         1218                          601
## ENSG00000000460.17                          176                          299
## ENSG00000000938.13                          534                         1954
##                    TCGA-86-8281-01A-11R-2287-07 TCGA-49-4486-01A-01R-1206-07
## ENSG00000000003.15                         8297                         4586
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                         1960                         2629
## ENSG00000000457.14                         2100                         1512
## ENSG00000000460.17                          543                          311
## ENSG00000000938.13                          519                          266
##                    TCGA-62-8399-01A-21R-2326-07 TCGA-44-8117-01A-11R-2241-07
## ENSG00000000003.15                         3014                         2255
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         2149                         3896
## ENSG00000000457.14                          843                         2317
## ENSG00000000460.17                          663                         1258
## ENSG00000000938.13                         1084                          756
##                    TCGA-44-4112-01A-01R-1107-07 TCGA-05-4397-01A-01R-1206-07
## ENSG00000000003.15                         3014                         7805
## ENSG00000000005.6                             7                           19
## ENSG00000000419.13                         1924                         9358
## ENSG00000000457.14                         1208                         1789
## ENSG00000000460.17                          506                         2743
## ENSG00000000938.13                          596                         1775
##                    TCGA-55-8094-01A-11R-2241-07 TCGA-44-3918-01B-02R-A277-07
## ENSG00000000003.15                         3719                          848
## ENSG00000000005.6                             0                           23
## ENSG00000000419.13                         2174                          552
## ENSG00000000457.14                          677                          900
## ENSG00000000460.17                          386                          562
## ENSG00000000938.13                          117                          286
##                    TCGA-49-6767-01A-11R-1858-07 TCGA-44-3918-01A-01R-1107-07
## ENSG00000000003.15                         9713                         5980
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         1640                         3549
## ENSG00000000457.14                          382                         1297
## ENSG00000000460.17                          475                          673
## ENSG00000000938.13                          648                         1529
##                    TCGA-44-3918-01A-01R-A278-07 TCGA-93-A4JQ-01A-11R-A24X-07
## ENSG00000000003.15                         4233                         1305
## ENSG00000000005.6                            10                           12
## ENSG00000000419.13                         3306                         1352
## ENSG00000000457.14                         1631                          915
## ENSG00000000460.17                          842                          621
## ENSG00000000938.13                          403                         1419
##                    TCGA-62-8394-01A-11R-2326-07 TCGA-62-A46U-01A-11R-A24H-07
## ENSG00000000003.15                         6428                         1168
## ENSG00000000005.6                             4                            0
## ENSG00000000419.13                         1522                         1460
## ENSG00000000457.14                          594                          681
## ENSG00000000460.17                          767                          363
## ENSG00000000938.13                          533                         6236
##                    TCGA-69-7761-01A-11R-2170-07 TCGA-MP-A4TH-01A-31R-A262-07
## ENSG00000000003.15                         1252                         1093
## ENSG00000000005.6                             2                            0
## ENSG00000000419.13                         1554                          752
## ENSG00000000457.14                         1259                         1113
## ENSG00000000460.17                          484                          213
## ENSG00000000938.13                         1193                         1106
##                    TCGA-86-8669-01A-11R-2403-07 TCGA-44-A479-01A-31R-A24H-07
## ENSG00000000003.15                         2203                         1889
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          889                         1941
## ENSG00000000457.14                          535                          548
## ENSG00000000460.17                          188                          301
## ENSG00000000938.13                          632                          879
##                    TCGA-64-1679-01A-21R-2066-07 TCGA-50-6595-11A-01R-1858-07
## ENSG00000000003.15                         5817                         1546
## ENSG00000000005.6                             1                            1
## ENSG00000000419.13                         1664                         1563
## ENSG00000000457.14                          754                          975
## ENSG00000000460.17                          605                          145
## ENSG00000000938.13                         1174                         4125
##                    TCGA-99-8032-01A-11R-2241-07 TCGA-55-6968-01A-11R-1949-07
## ENSG00000000003.15                         2721                         1194
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         1993                         1812
## ENSG00000000457.14                          824                          489
## ENSG00000000460.17                          461                          617
## ENSG00000000938.13                          658                          951
##                    TCGA-44-8120-01A-11R-2241-07 TCGA-97-8174-01A-11R-2287-07
## ENSG00000000003.15                         7846                         4413
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         2104                         1726
## ENSG00000000457.14                          922                         1895
## ENSG00000000460.17                          268                          411
## ENSG00000000938.13                         1152                         1527
##                    TCGA-38-4632-11A-01R-1755-07 TCGA-38-4632-01A-01R-1755-07
## ENSG00000000003.15                          764                         3401
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                         1088                         1626
## ENSG00000000457.14                          265                          405
## ENSG00000000460.17                           74                          474
## ENSG00000000938.13                         2501                          685
##                    TCGA-MP-A4SY-01A-21R-A24X-07 TCGA-95-8039-01A-11R-2241-07
## ENSG00000000003.15                         3991                         5552
## ENSG00000000005.6                             2                            4
## ENSG00000000419.13                         2162                         2216
## ENSG00000000457.14                          703                         1103
## ENSG00000000460.17                          492                          425
## ENSG00000000938.13                          755                         1684
##                    TCGA-L9-A8F4-01A-11R-A39D-07 TCGA-55-6971-11A-01R-1949-07
## ENSG00000000003.15                         2714                          706
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         1785                          811
## ENSG00000000457.14                          711                          635
## ENSG00000000460.17                          571                          104
## ENSG00000000938.13                         1693                         2901
##                    TCGA-86-8075-01A-11R-2241-07 TCGA-69-7760-01A-11R-2170-07
## ENSG00000000003.15                        11426                        12460
## ENSG00000000005.6                             3                         8122
## ENSG00000000419.13                         2428                         1295
## ENSG00000000457.14                         1236                          575
## ENSG00000000460.17                          888                          309
## ENSG00000000938.13                         1375                          143
##                    TCGA-75-5126-01A-01R-1755-07 TCGA-91-6849-01A-11R-1949-07
## ENSG00000000003.15                         2050                         1853
## ENSG00000000005.6                             4                            1
## ENSG00000000419.13                         1410                          675
## ENSG00000000457.14                          254                          409
## ENSG00000000460.17                          203                           93
## ENSG00000000938.13                         1652                         1364
##                    TCGA-86-7711-01A-11R-2066-07 TCGA-05-4417-01A-22R-1858-07
## ENSG00000000003.15                         4241                         2292
## ENSG00000000005.6                             0                            3
## ENSG00000000419.13                         2643                          654
## ENSG00000000457.14                          950                          460
## ENSG00000000460.17                         1010                          165
## ENSG00000000938.13                         1615                          658
##                    TCGA-97-8552-01A-11R-2403-07 TCGA-93-A4JN-01A-11R-A24X-07
## ENSG00000000003.15                         4123                         3860
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1390                         2367
## ENSG00000000457.14                          568                         1158
## ENSG00000000460.17                          116                         1084
## ENSG00000000938.13                         1770                          937
##                    TCGA-05-4389-01A-01R-1206-07 TCGA-64-5815-01A-01R-1628-07
## ENSG00000000003.15                         9134                         1139
## ENSG00000000005.6                             1                            8
## ENSG00000000419.13                         2215                          552
## ENSG00000000457.14                         1106                          234
## ENSG00000000460.17                          770                          127
## ENSG00000000938.13                         1592                          839
##                    TCGA-49-4512-11A-01R-1858-07 TCGA-L9-A50W-01A-12R-A39D-07
## ENSG00000000003.15                         1509                         3010
## ENSG00000000005.6                             2                            0
## ENSG00000000419.13                         1306                         1343
## ENSG00000000457.14                          361                          956
## ENSG00000000460.17                           59                          175
## ENSG00000000938.13                         1744                          675
##                    TCGA-86-8278-01A-11R-2287-07 TCGA-75-6203-01A-11R-1755-07
## ENSG00000000003.15                         8232                         1786
## ENSG00000000005.6                           624                            0
## ENSG00000000419.13                         2642                          737
## ENSG00000000457.14                         1369                          435
## ENSG00000000460.17                          511                          118
## ENSG00000000938.13                         1157                         2878
##                    TCGA-78-8655-01A-11R-2403-07 TCGA-97-8547-01A-11R-2403-07
## ENSG00000000003.15                         3347                         1482
## ENSG00000000005.6                             0                          126
## ENSG00000000419.13                          899                          506
## ENSG00000000457.14                          824                          313
## ENSG00000000460.17                          245                          125
## ENSG00000000938.13                          835                          509
##                    TCGA-55-8097-01A-11R-2241-07 TCGA-73-4668-01A-01R-1206-07
## ENSG00000000003.15                         2818                         8875
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                         1328                         3908
## ENSG00000000457.14                         1262                          805
## ENSG00000000460.17                          270                          924
## ENSG00000000938.13                         1200                         1227
##                    TCGA-55-7910-01A-11R-2170-07 TCGA-44-2668-01A-01R-A278-07
## ENSG00000000003.15                          923                         1092
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1785                         1202
## ENSG00000000457.14                          392                          514
## ENSG00000000460.17                          289                          356
## ENSG00000000938.13                          282                          775
##                    TCGA-55-8301-01A-11R-2287-07 TCGA-55-A490-01A-11R-A466-07
## ENSG00000000003.15                         2121                          735
## ENSG00000000005.6                             2                            0
## ENSG00000000419.13                         1609                          705
## ENSG00000000457.14                         1181                          478
## ENSG00000000460.17                          580                          112
## ENSG00000000938.13                         2021                         1499
##                    TCGA-78-7152-01A-11R-2039-07 TCGA-86-8671-01A-11R-2403-07
## ENSG00000000003.15                         1225                         1811
## ENSG00000000005.6                             1                            1
## ENSG00000000419.13                          340                          865
## ENSG00000000457.14                          347                          788
## ENSG00000000460.17                          136                          195
## ENSG00000000938.13                          398                         2406
##                    TCGA-55-6972-11A-01R-1949-07 TCGA-55-7913-01B-11R-2241-07
## ENSG00000000003.15                          573                         7249
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                          619                         3144
## ENSG00000000457.14                          370                         1768
## ENSG00000000460.17                           68                         1359
## ENSG00000000938.13                         2599                          798
##                    TCGA-73-4659-01A-01R-1206-07 TCGA-75-5147-01A-01R-1628-07
## ENSG00000000003.15                         5100                        15402
## ENSG00000000005.6                             1                           20
## ENSG00000000419.13                         2394                         3402
## ENSG00000000457.14                          939                         1474
## ENSG00000000460.17                          673                          863
## ENSG00000000938.13                         2717                         1484
##                    TCGA-44-2662-11A-01R-1758-07 TCGA-05-4403-01A-01R-1206-07
## ENSG00000000003.15                         1120                         3759
## ENSG00000000005.6                             7                            3
## ENSG00000000419.13                          980                         2482
## ENSG00000000457.14                          343                         1322
## ENSG00000000460.17                           72                          229
## ENSG00000000938.13                         4442                         3772
##                    TCGA-91-6831-11A-02R-1858-07 TCGA-38-4627-11A-01R-1758-07
## ENSG00000000003.15                          348                         1229
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                          752                         1703
## ENSG00000000457.14                          211                          345
## ENSG00000000460.17                           38                          122
## ENSG00000000938.13                         1506                         3772
##                    TCGA-53-7626-01A-12R-2066-07 TCGA-55-7728-01A-11R-2187-07
## ENSG00000000003.15                         3249                         1998
## ENSG00000000005.6                             0                            2
## ENSG00000000419.13                         1969                          909
## ENSG00000000457.14                         1345                         1281
## ENSG00000000460.17                          427                          191
## ENSG00000000938.13                         2713                        11350
##                    TCGA-91-6840-01A-11R-1949-07 TCGA-55-8507-01A-11R-2403-07
## ENSG00000000003.15                         4820                         2585
## ENSG00000000005.6                             3                            0
## ENSG00000000419.13                         1405                         1542
## ENSG00000000457.14                          459                          664
## ENSG00000000460.17                          398                          454
## ENSG00000000938.13                          408                          895
##                    TCGA-69-7973-01A-11R-2187-07 TCGA-44-5645-01A-01R-A278-07
## ENSG00000000003.15                         4156                         7077
## ENSG00000000005.6                             0                            8
## ENSG00000000419.13                         2481                         1657
## ENSG00000000457.14                         1271                         1611
## ENSG00000000460.17                         1147                          405
## ENSG00000000938.13                          547                          495
##                    TCGA-62-A46S-01A-11R-A24H-07 TCGA-44-5645-01A-01R-1628-07
## ENSG00000000003.15                         5963                         5638
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         1075                         1208
## ENSG00000000457.14                          979                          883
## ENSG00000000460.17                          222                          211
## ENSG00000000938.13                          856                          736
##                    TCGA-86-A4JF-01A-11R-A24X-07 TCGA-55-8087-01A-11R-2241-07
## ENSG00000000003.15                         1213                         4201
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         1359                         1793
## ENSG00000000457.14                          805                         2799
## ENSG00000000460.17                          667                          432
## ENSG00000000938.13                         1226                         1042
##                    TCGA-05-5715-01A-01R-1628-07 TCGA-50-6593-01A-11R-1755-07
## ENSG00000000003.15                         3190                         5471
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          496                          617
## ENSG00000000457.14                          555                          390
## ENSG00000000460.17                          243                          223
## ENSG00000000938.13                          994                         1079
##                    TCGA-97-A4LX-01A-11R-A24X-07 TCGA-44-2666-01A-01R-0946-07
## ENSG00000000003.15                         2279                         2692
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         1583                         2287
## ENSG00000000457.14                          804                         1122
## ENSG00000000460.17                          303                          449
## ENSG00000000938.13                         2305                          647
##                    TCGA-44-2666-01A-01R-A278-07 TCGA-55-6985-11A-01R-1949-07
## ENSG00000000003.15                         1768                         2794
## ENSG00000000005.6                             2                            0
## ENSG00000000419.13                         1703                         1659
## ENSG00000000457.14                          997                          905
## ENSG00000000460.17                          381                          204
## ENSG00000000938.13                          248                         7083
##                    TCGA-86-8668-01A-11R-2403-07 TCGA-69-7978-01A-11R-2187-07
## ENSG00000000003.15                         1370                         3153
## ENSG00000000005.6                             0                            3
## ENSG00000000419.13                          614                         2063
## ENSG00000000457.14                          408                          683
## ENSG00000000460.17                          130                          375
## ENSG00000000938.13                          510                         2700
##                    TCGA-49-AAR2-01A-11R-A39D-07 TCGA-44-A4SS-01A-11R-A24X-07
## ENSG00000000003.15                         2620                         3623
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          587                         1388
## ENSG00000000457.14                          369                          688
## ENSG00000000460.17                          166                          452
## ENSG00000000938.13                          575                         1312
##                    TCGA-78-7146-01A-11R-2039-07 TCGA-44-3398-01A-01R-1107-07
## ENSG00000000003.15                         3428                         8235
## ENSG00000000005.6                             1                          129
## ENSG00000000419.13                         1453                         2969
## ENSG00000000457.14                          497                          954
## ENSG00000000460.17                          753                          434
## ENSG00000000938.13                          521                         1646
##                    TCGA-44-3398-11B-01R-1758-07 TCGA-64-5774-01A-01R-1628-07
## ENSG00000000003.15                         1988                         4884
## ENSG00000000005.6                             6                            0
## ENSG00000000419.13                         1520                         1736
## ENSG00000000457.14                          815                          647
## ENSG00000000460.17                          170                          543
## ENSG00000000938.13                         6303                          245
##                    TCGA-55-8616-01A-11R-2403-07 TCGA-55-7570-01A-11R-2039-07
## ENSG00000000003.15                         2718                         2325
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1165                         2313
## ENSG00000000457.14                          639                          776
## ENSG00000000460.17                          293                         1012
## ENSG00000000938.13                          525                          296
##                    TCGA-38-7271-01A-11R-2039-07 TCGA-55-7914-01A-11R-2170-07
## ENSG00000000003.15                         1536                         2936
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          765                         1447
## ENSG00000000457.14                          510                          827
## ENSG00000000460.17                          172                          241
## ENSG00000000938.13                          911                          343
##                    TCGA-55-1595-01A-01R-0946-07 TCGA-49-6742-01A-11R-1858-07
## ENSG00000000003.15                         2784                         3257
## ENSG00000000005.6                             4                            0
## ENSG00000000419.13                         2321                          918
## ENSG00000000457.14                         1529                          706
## ENSG00000000460.17                          637                          219
## ENSG00000000938.13                         1087                          283
##                    TCGA-49-6742-11A-01R-1858-07 TCGA-38-4630-01A-01R-1206-07
## ENSG00000000003.15                         1688                        13245
## ENSG00000000005.6                             4                           77
## ENSG00000000419.13                          982                         3874
## ENSG00000000457.14                          413                         2285
## ENSG00000000460.17                           85                         2616
## ENSG00000000938.13                         2118                          657
##                    TCGA-44-2661-01A-01R-1107-07 TCGA-55-1592-01A-01R-0946-07
## ENSG00000000003.15                         4558                         8260
## ENSG00000000005.6                             0                            6
## ENSG00000000419.13                         2089                         3235
## ENSG00000000457.14                         1025                         1587
## ENSG00000000460.17                          315                          779
## ENSG00000000938.13                         1760                         2943
##                    TCGA-50-5932-01A-11R-1755-07 TCGA-55-1594-01A-01R-0946-07
## ENSG00000000003.15                         4545                        10056
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         1557                         6290
## ENSG00000000457.14                         1185                          672
## ENSG00000000460.17                          322                          768
## ENSG00000000938.13                          424                          633
##                    TCGA-44-7661-01A-11R-2066-07 TCGA-44-6147-11A-01R-1858-07
## ENSG00000000003.15                         2138                          849
## ENSG00000000005.6                             4                            1
## ENSG00000000419.13                         2179                         1237
## ENSG00000000457.14                          830                          290
## ENSG00000000460.17                          610                           95
## ENSG00000000938.13                         2447                         4712
##                    TCGA-44-6147-01A-11R-1755-07 TCGA-05-4384-01A-01R-1755-07
## ENSG00000000003.15                         2834                         1343
## ENSG00000000005.6                             2                            0
## ENSG00000000419.13                         1307                          907
## ENSG00000000457.14                          715                          823
## ENSG00000000460.17                          285                          189
## ENSG00000000938.13                         1045                         1047
##                    TCGA-44-6147-01A-11R-A278-07 TCGA-80-5607-01A-31R-1949-07
## ENSG00000000003.15                         2178                         1901
## ENSG00000000005.6                             4                            6
## ENSG00000000419.13                         1430                          961
## ENSG00000000457.14                         1139                          225
## ENSG00000000460.17                          434                          169
## ENSG00000000938.13                          606                          447
##                    TCGA-50-5072-01A-21R-1858-07 TCGA-05-4396-01A-21R-1858-07
## ENSG00000000003.15                         2752                         1107
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                         1162                          776
## ENSG00000000457.14                         1088                          495
## ENSG00000000460.17                          888                          115
## ENSG00000000938.13                          692                          135
##                    TCGA-67-3770-01A-01R-0946-07 TCGA-73-4676-01A-01R-1755-07
## ENSG00000000003.15                         3661                         4702
## ENSG00000000005.6                            18                            0
## ENSG00000000419.13                          662                         1571
## ENSG00000000457.14                          436                          432
## ENSG00000000460.17                           96                          488
## ENSG00000000938.13                          471                          738
##                    TCGA-50-5939-01A-11R-1628-07 TCGA-91-6836-11A-01R-1858-07
## ENSG00000000003.15                         1588                          743
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                          816                          739
## ENSG00000000457.14                          359                          405
## ENSG00000000460.17                          193                           60
## ENSG00000000938.13                          783                         3111
##                    TCGA-91-6836-01A-21R-1858-07 TCGA-05-4434-01A-01R-1206-07
## ENSG00000000003.15                         1538                         3342
## ENSG00000000005.6                             2                            0
## ENSG00000000419.13                         2094                         2713
## ENSG00000000457.14                          403                         1920
## ENSG00000000460.17                          415                          953
## ENSG00000000938.13                          433                         4108
##                    TCGA-55-8621-01A-11R-2403-07 TCGA-05-4402-01A-01R-1206-07
## ENSG00000000003.15                         1567                        12949
## ENSG00000000005.6                             1                            6
## ENSG00000000419.13                         1007                         2197
## ENSG00000000457.14                          541                         1530
## ENSG00000000460.17                          203                          701
## ENSG00000000938.13                         2945                         1531
##                    TCGA-44-3917-01A-01R-A278-07 TCGA-73-4677-01A-01R-1206-07
## ENSG00000000003.15                         2436                         4473
## ENSG00000000005.6                             2                            0
## ENSG00000000419.13                         1918                         2253
## ENSG00000000457.14                          895                         1465
## ENSG00000000460.17                          780                          348
## ENSG00000000938.13                          126                         1150
##                    TCGA-67-3773-01A-01R-0946-07 TCGA-91-6848-01A-11R-1949-07
## ENSG00000000003.15                          848                         1028
## ENSG00000000005.6                             1                            1
## ENSG00000000419.13                          797                          997
## ENSG00000000457.14                          446                          216
## ENSG00000000460.17                           95                          302
## ENSG00000000938.13                          370                          991
##                    TCGA-50-6592-01A-11R-1755-07 TCGA-86-7954-01A-11R-2187-07
## ENSG00000000003.15                         3334                         9079
## ENSG00000000005.6                             1                            3
## ENSG00000000419.13                         2504                         3519
## ENSG00000000457.14                          657                         1630
## ENSG00000000460.17                          621                         1097
## ENSG00000000938.13                         1294                         1745
##                    TCGA-55-6981-01A-11R-1949-07 TCGA-86-8054-01A-11R-2241-07
## ENSG00000000003.15                         2982                         8681
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          657                         3598
## ENSG00000000457.14                          341                         1030
## ENSG00000000460.17                          179                         1492
## ENSG00000000938.13                          340                          544
##                    TCGA-J2-A4AD-01A-11R-A24H-07 TCGA-62-A46P-01A-11R-A24H-07
## ENSG00000000003.15                         3719                         1766
## ENSG00000000005.6                             2                            0
## ENSG00000000419.13                         2154                         1661
## ENSG00000000457.14                         1319                          475
## ENSG00000000460.17                          563                           95
## ENSG00000000938.13                          357                          368
##                    TCGA-86-7955-01A-11R-2187-07 TCGA-50-6590-01A-12R-1858-07
## ENSG00000000003.15                         4279                          544
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         3773                         1174
## ENSG00000000457.14                          757                          309
## ENSG00000000460.17                          685                          382
## ENSG00000000938.13                          236                         1052
##                    TCGA-97-8179-01A-11R-2287-07 TCGA-91-A4BD-01A-11R-A24H-07
## ENSG00000000003.15                         6146                         1443
## ENSG00000000005.6                             5                            0
## ENSG00000000419.13                         2211                         1275
## ENSG00000000457.14                         1158                          230
## ENSG00000000460.17                          632                          104
## ENSG00000000938.13                          720                          930
##                    TCGA-50-5931-11A-01R-1858-07 TCGA-78-7162-01A-21R-2066-07
## ENSG00000000003.15                          577                         2960
## ENSG00000000005.6                             1                            1
## ENSG00000000419.13                          941                         1384
## ENSG00000000457.14                          275                         1039
## ENSG00000000460.17                           36                          204
## ENSG00000000938.13                         2662                          935
##                    TCGA-50-5931-01A-11R-1755-07 TCGA-78-8662-01A-11R-2403-07
## ENSG00000000003.15                         1584                         2347
## ENSG00000000005.6                             2                            2
## ENSG00000000419.13                         1594                          603
## ENSG00000000457.14                          835                          469
## ENSG00000000460.17                          510                          301
## ENSG00000000938.13                          114                          240
##                    TCGA-75-5125-01A-01R-1755-07 TCGA-55-8512-01A-11R-2403-07
## ENSG00000000003.15                         3214                         3334
## ENSG00000000005.6                            18                            8
## ENSG00000000419.13                         1552                         1317
## ENSG00000000457.14                          414                          644
## ENSG00000000460.17                          316                          120
## ENSG00000000938.13                         1211                         1241
##                    TCGA-73-A9RS-01A-11R-A41B-07 TCGA-69-7764-01A-11R-2170-07
## ENSG00000000003.15                          583                          857
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                         1436                         1000
## ENSG00000000457.14                          482                          863
## ENSG00000000460.17                          278                          346
## ENSG00000000938.13                          248                          236
##                    TCGA-L9-A5IP-01A-21R-A39D-07 TCGA-55-6987-01A-11R-1949-07
## ENSG00000000003.15                         1113                         4509
## ENSG00000000005.6                             0                            3
## ENSG00000000419.13                         1180                         2182
## ENSG00000000457.14                          982                         1523
## ENSG00000000460.17                          642                          819
## ENSG00000000938.13                          361                         3999
##                    TCGA-38-A44F-01A-11R-A24H-07 TCGA-NJ-A4YI-01A-11R-A262-07
## ENSG00000000003.15                         2450                          662
## ENSG00000000005.6                             0                          107
## ENSG00000000419.13                         1360                          919
## ENSG00000000457.14                          591                          557
## ENSG00000000460.17                          119                          193
## ENSG00000000938.13                         1854                          843
##                    TCGA-62-8398-01A-11R-2326-07 TCGA-55-6978-01A-11R-1949-07
## ENSG00000000003.15                         4410                         4422
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1630                         2578
## ENSG00000000457.14                          940                          794
## ENSG00000000460.17                          763                          879
## ENSG00000000938.13                          907                         1896
##                    TCGA-93-A4JP-01A-11R-A24X-07 TCGA-75-6214-01A-41R-1949-07
## ENSG00000000003.15                         4887                         4375
## ENSG00000000005.6                             5                            2
## ENSG00000000419.13                         1677                         4095
## ENSG00000000457.14                          982                          934
## ENSG00000000460.17                          509                          673
## ENSG00000000938.13                         2570                          425
##                    TCGA-S2-AA1A-01A-12R-A39D-07 TCGA-44-6777-11A-01R-1858-07
## ENSG00000000003.15                         4735                          689
## ENSG00000000005.6                             2                            2
## ENSG00000000419.13                         1031                          653
## ENSG00000000457.14                          765                          269
## ENSG00000000460.17                          152                           57
## ENSG00000000938.13                          783                         2924
##                    TCGA-44-6774-01A-21R-1858-07 TCGA-55-6982-01A-11R-1949-07
## ENSG00000000003.15                         1403                         2291
## ENSG00000000005.6                             1                            2
## ENSG00000000419.13                          662                          586
## ENSG00000000457.14                          256                          384
## ENSG00000000460.17                          158                          206
## ENSG00000000938.13                          372                          373
##                    TCGA-05-5423-01A-01R-1628-07 TCGA-55-A57B-01A-12R-A39D-07
## ENSG00000000003.15                         4477                         5052
## ENSG00000000005.6                            12                            7
## ENSG00000000419.13                         2813                         1288
## ENSG00000000457.14                         1014                         1049
## ENSG00000000460.17                          525                          320
## ENSG00000000938.13                          742                         1272
##                    TCGA-L9-A443-01A-12R-A24H-07 TCGA-05-4430-01A-02R-1206-07
## ENSG00000000003.15                         2101                         6642
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         1078                         2471
## ENSG00000000457.14                          750                          806
## ENSG00000000460.17                          237                          495
## ENSG00000000938.13                          484                         1613
##                    TCGA-55-A4DG-01A-11R-A24H-07 TCGA-55-8506-01A-11R-2403-07
## ENSG00000000003.15                          788                         1597
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         1386                         1124
## ENSG00000000457.14                         1305                          538
## ENSG00000000460.17                          228                          292
## ENSG00000000938.13                          492                          531
##                    TCGA-05-5428-01A-01R-1628-07 TCGA-44-7672-01A-11R-2066-07
## ENSG00000000003.15                         2096                         4242
## ENSG00000000005.6                             0                           16
## ENSG00000000419.13                         3780                         1827
## ENSG00000000457.14                          698                          661
## ENSG00000000460.17                          594                          405
## ENSG00000000938.13                          254                         2584
##                    TCGA-64-5781-01A-01R-1628-07 TCGA-05-4422-01A-01R-1206-07
## ENSG00000000003.15                         1826                         4911
## ENSG00000000005.6                             1                            1
## ENSG00000000419.13                         1309                         1213
## ENSG00000000457.14                          521                         2786
## ENSG00000000460.17                          238                          753
## ENSG00000000938.13                          245                          858
##                    TCGA-44-2656-01A-02R-0946-07 TCGA-78-7535-01A-11R-2066-07
## ENSG00000000003.15                         6039                         4813
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                         2198                         2160
## ENSG00000000457.14                         1767                          785
## ENSG00000000460.17                          689                          305
## ENSG00000000938.13                         2550                         3024
##                    TCGA-86-8358-01A-11R-2326-07 TCGA-62-8402-01A-11R-2326-07
## ENSG00000000003.15                          669                         6379
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          889                         2002
## ENSG00000000457.14                          442                          658
## ENSG00000000460.17                          662                          345
## ENSG00000000938.13                          247                         1287
##                    TCGA-MP-A4T7-01A-11R-A24X-07 TCGA-50-8460-01A-11R-2326-07
## ENSG00000000003.15                         1456                         2120
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1851                         1161
## ENSG00000000457.14                          650                          432
## ENSG00000000460.17                          378                          126
## ENSG00000000938.13                         1686                          981
##                    TCGA-69-8253-01A-11R-2287-07 TCGA-49-6761-01A-31R-1949-07
## ENSG00000000003.15                         2492                         3950
## ENSG00000000005.6                             2                           10
## ENSG00000000419.13                         3880                          608
## ENSG00000000457.14                         1100                          375
## ENSG00000000460.17                          204                          210
## ENSG00000000938.13                         1036                          595
##                    TCGA-L9-A444-01A-21R-A24H-07 TCGA-49-6761-11A-01R-1949-07
## ENSG00000000003.15                         1040                          696
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                          859                          558
## ENSG00000000457.14                          597                          290
## ENSG00000000460.17                          151                           54
## ENSG00000000938.13                          901                         2471
##                    TCGA-55-7227-01A-11R-2039-07 TCGA-44-7669-01A-21R-2066-07
## ENSG00000000003.15                         2235                         3352
## ENSG00000000005.6                            18                            0
## ENSG00000000419.13                          918                         2922
## ENSG00000000457.14                          488                         1147
## ENSG00000000460.17                          187                         1262
## ENSG00000000938.13                         1107                         1333
##                    TCGA-75-7031-01A-11R-1949-07 TCGA-78-7143-01A-11R-2039-07
## ENSG00000000003.15                         1703                         4537
## ENSG00000000005.6                             0                            3
## ENSG00000000419.13                          702                         1202
## ENSG00000000457.14                          457                         1099
## ENSG00000000460.17                          178                          568
## ENSG00000000938.13                          494                          972
##                    TCGA-55-8092-01A-11R-2241-07 TCGA-49-4507-01A-01R-1206-07
## ENSG00000000003.15                         4028                         2096
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                         3934                         1665
## ENSG00000000457.14                         1783                          403
## ENSG00000000460.17                          881                          270
## ENSG00000000938.13                          702                          565
##                    TCGA-73-4666-01A-01R-1206-07 TCGA-91-8499-01A-11R-2403-07
## ENSG00000000003.15                         4382                        13000
## ENSG00000000005.6                             7                            1
## ENSG00000000419.13                         2336                         2073
## ENSG00000000457.14                         1320                          426
## ENSG00000000460.17                         1540                          624
## ENSG00000000938.13                         1939                          654
##                    TCGA-86-8359-01A-11R-2326-07 TCGA-50-6595-01A-12R-1858-07
## ENSG00000000003.15                         2085                         1329
## ENSG00000000005.6                             2                            0
## ENSG00000000419.13                         1867                         1395
## ENSG00000000457.14                          605                          297
## ENSG00000000460.17                          227                          320
## ENSG00000000938.13                          659                          387
##                    TCGA-55-6968-11A-01R-1949-07 TCGA-44-7667-01A-31R-2066-07
## ENSG00000000003.15                          548                         2366
## ENSG00000000005.6                             1                            1
## ENSG00000000419.13                          709                         2760
## ENSG00000000457.14                          399                          807
## ENSG00000000460.17                           92                         1011
## ENSG00000000938.13                         3718                          491
##                    TCGA-99-8033-01A-11R-2241-07 TCGA-62-8397-01A-11R-2326-07
## ENSG00000000003.15                         2519                         1233
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         3146                          411
## ENSG00000000457.14                         1741                          272
## ENSG00000000460.17                         1144                           34
## ENSG00000000938.13                         1340                          285
##                    TCGA-MP-A4TA-01A-21R-A24X-07 TCGA-55-8510-01A-11R-2403-07
## ENSG00000000003.15                         2165                         2611
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                         2351                          828
## ENSG00000000457.14                          776                          503
## ENSG00000000460.17                          960                          332
## ENSG00000000938.13                          744                         2151
##                    TCGA-50-6591-01A-11R-1755-07 TCGA-05-5425-01A-02R-1628-07
## ENSG00000000003.15                         2307                         2393
## ENSG00000000005.6                          1295                            0
## ENSG00000000419.13                         2030                         2238
## ENSG00000000457.14                          631                          640
## ENSG00000000460.17                          823                          468
## ENSG00000000938.13                          115                         1232
##                    TCGA-49-AARR-01A-11R-A41B-07 TCGA-55-7574-01A-11R-2039-07
## ENSG00000000003.15                         3859                          996
## ENSG00000000005.6                             2                            0
## ENSG00000000419.13                         1149                          737
## ENSG00000000457.14                          600                          478
## ENSG00000000460.17                          123                          275
## ENSG00000000938.13                         1524                          827
##                    TCGA-44-7662-01A-11R-2066-07 TCGA-69-7979-01A-11R-2187-07
## ENSG00000000003.15                         3513                         1683
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         3093                         2845
## ENSG00000000457.14                         1291                          957
## ENSG00000000460.17                         1794                         1233
## ENSG00000000938.13                         1332                          998
##                    TCGA-55-8614-01A-11R-2403-07 TCGA-69-7980-01A-11R-2187-07
## ENSG00000000003.15                         3630                         4915
## ENSG00000000005.6                             2                            0
## ENSG00000000419.13                         1122                         2023
## ENSG00000000457.14                          399                         1156
## ENSG00000000460.17                          289                          670
## ENSG00000000938.13                          338                         1463
##                    TCGA-55-A494-01A-11R-A24X-07 TCGA-73-4675-01A-01R-1206-07
## ENSG00000000003.15                         1305                         5937
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          732                         1435
## ENSG00000000457.14                          929                         1093
## ENSG00000000460.17                          344                          254
## ENSG00000000938.13                          160                         1487
##                    TCGA-05-4390-01A-02R-1755-07 TCGA-97-7941-01A-11R-2187-07
## ENSG00000000003.15                         2311                        11528
## ENSG00000000005.6                             0                           13
## ENSG00000000419.13                         1331                         1547
## ENSG00000000457.14                          385                         1257
## ENSG00000000460.17                          499                          232
## ENSG00000000938.13                          540                         1465
##                    TCGA-05-4433-01A-22R-1858-07 TCGA-69-7765-01A-11R-2170-07
## ENSG00000000003.15                          870                         2063
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                          734                         1209
## ENSG00000000457.14                          672                          717
## ENSG00000000460.17                          184                          251
## ENSG00000000938.13                         1939                          609
##                    TCGA-86-8672-01A-21R-2403-07 TCGA-49-4506-01A-01R-1206-07
## ENSG00000000003.15                         1329                         1998
## ENSG00000000005.6                             1                            3
## ENSG00000000419.13                         1259                         1969
## ENSG00000000457.14                          274                          983
## ENSG00000000460.17                          190                          665
## ENSG00000000938.13                          917                          576
##                    TCGA-MP-A4TE-01A-22R-A466-07 TCGA-55-8508-01A-11R-2403-07
## ENSG00000000003.15                         4548                         1431
## ENSG00000000005.6                             4                            0
## ENSG00000000419.13                         1531                          764
## ENSG00000000457.14                          704                          513
## ENSG00000000460.17                          295                          258
## ENSG00000000938.13                          376                          693
##                    TCGA-44-2668-01B-02R-A277-07 TCGA-75-6206-01A-11R-1755-07
## ENSG00000000003.15                          613                         1198
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          789                          815
## ENSG00000000457.14                          889                          680
## ENSG00000000460.17                          577                          146
## ENSG00000000938.13                         1914                         1094
##                    TCGA-50-5946-01A-11R-1755-07 TCGA-97-A4M1-01A-11R-A24X-07
## ENSG00000000003.15                         3601                         2633
## ENSG00000000005.6                             2                            5
## ENSG00000000419.13                         2721                         1583
## ENSG00000000457.14                         1287                          908
## ENSG00000000460.17                         1075                          246
## ENSG00000000938.13                          281                         1402
##                    TCGA-55-6972-01A-11R-1949-07 TCGA-64-1681-01A-11R-2066-07
## ENSG00000000003.15                         1575                         8534
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         1819                         2291
## ENSG00000000457.14                          917                          886
## ENSG00000000460.17                          173                          419
## ENSG00000000938.13                           38                         1134
##                    TCGA-97-A4M7-01A-11R-A24X-07 TCGA-44-2662-01A-01R-A278-07
## ENSG00000000003.15                         3582                         3666
## ENSG00000000005.6                             1                            5
## ENSG00000000419.13                         1222                         1861
## ENSG00000000457.14                          789                          647
## ENSG00000000460.17                          344                          959
## ENSG00000000938.13                         1746                         1275
##                    TCGA-38-4628-01A-01R-1206-07 TCGA-55-7283-01A-11R-2039-07
## ENSG00000000003.15                         3942                         2504
## ENSG00000000005.6                             1                            3
## ENSG00000000419.13                         3034                         1060
## ENSG00000000457.14                         1280                          631
## ENSG00000000460.17                          913                          316
## ENSG00000000938.13                          602                          657
##                    TCGA-44-2662-01A-01R-0946-07 TCGA-49-4488-01A-01R-1755-07
## ENSG00000000003.15                         5758                         1848
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         2608                         1264
## ENSG00000000457.14                          666                          590
## ENSG00000000460.17                         1221                          172
## ENSG00000000938.13                         3435                          579
##                    TCGA-50-8457-01A-11R-2326-07 TCGA-86-6562-01A-11R-1755-07
## ENSG00000000003.15                         1815                         3136
## ENSG00000000005.6                             4                            0
## ENSG00000000419.13                          999                         1460
## ENSG00000000457.14                          918                          950
## ENSG00000000460.17                          263                          531
## ENSG00000000938.13                         1054                          586
##                    TCGA-75-7030-01A-11R-1949-07 TCGA-55-7815-01A-11R-2170-07
## ENSG00000000003.15                         1523                         2508
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                          388                          775
## ENSG00000000457.14                          329                          466
## ENSG00000000460.17                           69                          172
## ENSG00000000938.13                          432                          303
##                    TCGA-J2-A4AG-01A-11R-A24H-07 TCGA-44-2666-01B-02R-A277-07
## ENSG00000000003.15                         2607                          801
## ENSG00000000005.6                           148                           16
## ENSG00000000419.13                         1051                          469
## ENSG00000000457.14                          565                          877
## ENSG00000000460.17                          284                          377
## ENSG00000000938.13                          874                           86
##                    TCGA-78-7542-01A-21R-2066-07 TCGA-62-A472-01A-11R-A24H-07
## ENSG00000000003.15                         2326                          908
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         3111                          722
## ENSG00000000457.14                          969                          412
## ENSG00000000460.17                         1208                          113
## ENSG00000000938.13                          382                          398
##                    TCGA-69-8254-01A-11R-2287-07 TCGA-55-6712-01A-11R-1858-07
## ENSG00000000003.15                        10178                         1849
## ENSG00000000005.6                            10                            1
## ENSG00000000419.13                         2362                          812
## ENSG00000000457.14                         3230                          406
## ENSG00000000460.17                          505                          186
## ENSG00000000938.13                         1773                          535
##                    TCGA-44-2657-11A-01R-1758-07 TCGA-44-2657-01A-01R-1107-07
## ENSG00000000003.15                          881                         2650
## ENSG00000000005.6                             1                            5
## ENSG00000000419.13                          941                         1322
## ENSG00000000457.14                          536                          661
## ENSG00000000460.17                          144                          249
## ENSG00000000938.13                         2954                         1125
##                    TCGA-MP-A4TI-01A-21R-A24X-07 TCGA-55-8505-01A-11R-2403-07
## ENSG00000000003.15                         1717                         1501
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                          755                         1160
## ENSG00000000457.14                          608                          473
## ENSG00000000460.17                          456                          282
## ENSG00000000938.13                         3008                          346
##                    TCGA-49-4510-01A-01R-1206-07 TCGA-44-2665-11A-01R-1758-07
## ENSG00000000003.15                         6084                         1745
## ENSG00000000005.6                             0                            2
## ENSG00000000419.13                         2553                         1381
## ENSG00000000457.14                          856                          574
## ENSG00000000460.17                          300                          162
## ENSG00000000938.13                         1005                         3844
##                    TCGA-64-1676-01A-01R-0946-07 TCGA-44-2665-01A-01R-0946-07
## ENSG00000000003.15                         5889                         4236
## ENSG00000000005.6                             0                         1321
## ENSG00000000419.13                         2845                         1799
## ENSG00000000457.14                          476                         1301
## ENSG00000000460.17                          252                          376
## ENSG00000000938.13                          742                          823
##                    TCGA-97-7937-01A-11R-2170-07 TCGA-05-4418-01A-01R-1206-07
## ENSG00000000003.15                         4337                         7666
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         3201                         1574
## ENSG00000000457.14                         1367                          948
## ENSG00000000460.17                          625                          424
## ENSG00000000938.13                          307                         1210
##                    TCGA-86-8076-01A-31R-2241-07 TCGA-55-8091-01A-11R-2241-07
## ENSG00000000003.15                         3269                         3632
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         1540                         1796
## ENSG00000000457.14                         1171                          774
## ENSG00000000460.17                          318                          228
## ENSG00000000938.13                         1915                         1171
##                    TCGA-38-4631-01A-01R-1755-07 TCGA-44-2655-11A-01R-1758-07
## ENSG00000000003.15                         2567                         2497
## ENSG00000000005.6                             1                            7
## ENSG00000000419.13                         1771                         1525
## ENSG00000000457.14                          354                          803
## ENSG00000000460.17                          488                          165
## ENSG00000000938.13                          343                         3215
##                    TCGA-44-2659-01A-01R-0946-07 TCGA-55-7725-01A-11R-2170-07
## ENSG00000000003.15                         1845                          942
## ENSG00000000005.6                             9                            2
## ENSG00000000419.13                         1669                          878
## ENSG00000000457.14                         1203                          635
## ENSG00000000460.17                          293                          150
## ENSG00000000938.13                         1004                          589
##                    TCGA-38-4629-01A-02R-1206-07 TCGA-05-4382-01A-01R-1206-07
## ENSG00000000003.15                         7434                         3887
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         2968                         2823
## ENSG00000000457.14                          555                         1024
## ENSG00000000460.17                          643                          724
## ENSG00000000938.13                         2145                         4651
##                    TCGA-73-4676-11A-01R-1755-07 TCGA-44-6146-01B-04R-A277-07
## ENSG00000000003.15                          823                          721
## ENSG00000000005.6                             1                           14
## ENSG00000000419.13                          923                          650
## ENSG00000000457.14                          230                          773
## ENSG00000000460.17                           76                          343
## ENSG00000000938.13                         3373                           81
##                    TCGA-38-6178-01A-11R-1755-07 TCGA-55-7573-01A-11R-2039-07
## ENSG00000000003.15                         2182                         3083
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                          737                          808
## ENSG00000000457.14                          376                          767
## ENSG00000000460.17                          142                          276
## ENSG00000000938.13                          387                          532
##                    TCGA-50-5935-01A-11R-1755-07 TCGA-55-8207-01A-11R-2241-07
## ENSG00000000003.15                         2190                         4227
## ENSG00000000005.6                             2                            1
## ENSG00000000419.13                         1125                         2297
## ENSG00000000457.14                         1122                         1052
## ENSG00000000460.17                          357                          416
## ENSG00000000938.13                          560                         2089
##                    TCGA-50-5935-11A-01R-1858-07 TCGA-50-5051-01A-21R-1858-07
## ENSG00000000003.15                         1189                         2627
## ENSG00000000005.6                             7                            1
## ENSG00000000419.13                          709                          602
## ENSG00000000457.14                          361                          452
## ENSG00000000460.17                           54                          171
## ENSG00000000938.13                         1815                          283
##                    TCGA-44-3917-01B-02R-A277-07 TCGA-78-7540-01A-11R-2066-07
## ENSG00000000003.15                          854                         1919
## ENSG00000000005.6                             9                            0
## ENSG00000000419.13                          689                         2082
## ENSG00000000457.14                          789                          899
## ENSG00000000460.17                          777                          262
## ENSG00000000938.13                          134                         2432
##                    TCGA-NJ-A7XG-01A-12R-A39D-07 TCGA-55-7576-01A-11R-2066-07
## ENSG00000000003.15                         5137                         4196
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                         1429                         2406
## ENSG00000000457.14                          831                         2073
## ENSG00000000460.17                          244                         1106
## ENSG00000000938.13                          156                         1454
##                    TCGA-64-5775-01A-01R-1628-07 TCGA-55-8203-01A-11R-2241-07
## ENSG00000000003.15                         3577                         4144
## ENSG00000000005.6                            13                            0
## ENSG00000000419.13                         1206                         2439
## ENSG00000000457.14                          263                         1644
## ENSG00000000460.17                          172                          670
## ENSG00000000938.13                          758                         1364
##                    TCGA-55-6986-01A-11R-1949-07 TCGA-86-6851-01A-11R-1949-07
## ENSG00000000003.15                         3399                         2918
## ENSG00000000005.6                             6                            0
## ENSG00000000419.13                          967                         2183
## ENSG00000000457.14                          407                         1616
## ENSG00000000460.17                          107                          781
## ENSG00000000938.13                          423                         2165
##                    TCGA-49-4505-01A-01R-1206-07 TCGA-73-4662-01A-01R-1206-07
## ENSG00000000003.15                         7867                         6839
## ENSG00000000005.6                            44                          150
## ENSG00000000419.13                         3030                         2512
## ENSG00000000457.14                         1011                         2889
## ENSG00000000460.17                          203                         1865
## ENSG00000000938.13                         1878                         2257
##                    TCGA-55-6981-11A-01R-1949-07 TCGA-38-4625-01A-01R-1206-07
## ENSG00000000003.15                          835                        10120
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          572                         6714
## ENSG00000000457.14                          274                         1006
## ENSG00000000460.17                           93                         2088
## ENSG00000000938.13                         1564                         1873
##                    TCGA-55-7284-01B-11R-2241-07 TCGA-38-4625-11A-01R-1758-07
## ENSG00000000003.15                         2720                         1990
## ENSG00000000005.6                             1                            9
## ENSG00000000419.13                         1963                         1984
## ENSG00000000457.14                         1690                          608
## ENSG00000000460.17                          307                          161
## ENSG00000000938.13                         4064                        12548
##                    TCGA-55-8089-01A-11R-2241-07 TCGA-38-4626-01A-01R-1206-07
## ENSG00000000003.15                         3540                         2330
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                         2662                         2728
## ENSG00000000457.14                         1091                          965
## ENSG00000000460.17                          819                          318
## ENSG00000000938.13                         3082                        10286
##                    TCGA-44-5644-01A-21R-2039-07 TCGA-44-6144-11A-01R-1755-07
## ENSG00000000003.15                         1221                          651
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1129                          927
## ENSG00000000457.14                          545                          462
## ENSG00000000460.17                          318                           94
## ENSG00000000938.13                          196                         4866
##                    TCGA-91-6835-01A-11R-1858-07 TCGA-L4-A4E6-01A-11R-A24H-07
## ENSG00000000003.15                         3528                          785
## ENSG00000000005.6                             0                            3
## ENSG00000000419.13                         1092                          969
## ENSG00000000457.14                         1081                          574
## ENSG00000000460.17                          563                          136
## ENSG00000000938.13                         1724                         6791
##                    TCGA-05-4398-01A-01R-1206-07 TCGA-97-A4M6-01A-11R-A24X-07
## ENSG00000000003.15                         6052                         7372
## ENSG00000000005.6                             3                            4
## ENSG00000000419.13                         3726                         1315
## ENSG00000000457.14                         1336                         1351
## ENSG00000000460.17                         1243                          332
## ENSG00000000938.13                         3469                         1245
##                    TCGA-49-AAR9-01A-21R-A41B-07 TCGA-75-7027-01A-11R-1949-07
## ENSG00000000003.15                         2860                         6732
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1911                         2738
## ENSG00000000457.14                          815                         1133
## ENSG00000000460.17                          759                         1258
## ENSG00000000938.13                          106                          592
##                    TCGA-49-AARO-01A-12R-A41B-07 TCGA-69-8255-01A-11R-2287-07
## ENSG00000000003.15                         3647                         3857
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                         1123                         4734
## ENSG00000000457.14                          569                         1142
## ENSG00000000460.17                          255                          689
## ENSG00000000938.13                         1827                         1489
##                    TCGA-55-7816-01A-11R-2170-07 TCGA-86-A4D0-01A-11R-A24H-07
## ENSG00000000003.15                         1121                         3091
## ENSG00000000005.6                            25                            0
## ENSG00000000419.13                         1434                         1757
## ENSG00000000457.14                          809                          740
## ENSG00000000460.17                          236                          813
## ENSG00000000938.13                         4004                          309
##                    TCGA-NJ-A4YQ-01A-11R-A262-07 TCGA-78-8648-01A-11R-2403-07
## ENSG00000000003.15                         1569                          746
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1070                          691
## ENSG00000000457.14                          929                          300
## ENSG00000000460.17                          310                          142
## ENSG00000000938.13                         1076                         2646
##                    TCGA-L9-A7SV-01A-11R-A39D-07 TCGA-49-AAR3-01A-11R-A41B-07
## ENSG00000000003.15                         1694                         1073
## ENSG00000000005.6                             2                            1
## ENSG00000000419.13                         2119                          892
## ENSG00000000457.14                          761                          351
## ENSG00000000460.17                          385                          307
## ENSG00000000938.13                          451                          809
##                    TCGA-44-6778-11A-01R-1858-07 TCGA-44-6145-11A-01R-1858-07
## ENSG00000000003.15                         1028                          776
## ENSG00000000005.6                             3                            2
## ENSG00000000419.13                         1206                          992
## ENSG00000000457.14                          486                          307
## ENSG00000000460.17                          105                           67
## ENSG00000000938.13                         4093                         3583
##                    TCGA-91-6830-01A-11R-1949-07 TCGA-MP-A4T6-01A-32R-A262-07
## ENSG00000000003.15                         2295                         4736
## ENSG00000000005.6                             1                            2
## ENSG00000000419.13                         1113                         1460
## ENSG00000000457.14                          430                         1936
## ENSG00000000460.17                          222                          486
## ENSG00000000938.13                          654                         1481
##                    TCGA-91-8496-01A-11R-2403-07 TCGA-55-8085-01A-11R-2241-07
## ENSG00000000003.15                         4435                         3893
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                         1059                         1818
## ENSG00000000457.14                         1243                         1487
## ENSG00000000460.17                          209                          919
## ENSG00000000938.13                         1434                         1416
##                    TCGA-55-7281-01A-11R-2039-07 TCGA-05-5429-01A-01R-1628-07
## ENSG00000000003.15                         3595                         2184
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                         1163                         1549
## ENSG00000000457.14                          329                          855
## ENSG00000000460.17                          186                          298
## ENSG00000000938.13                         1872                          102
##                    TCGA-44-3919-01A-02R-1107-07 TCGA-75-7025-01A-12R-1949-07
## ENSG00000000003.15                         6709                         9390
## ENSG00000000005.6                             6                            5
## ENSG00000000419.13                         1668                         1698
## ENSG00000000457.14                          997                         1277
## ENSG00000000460.17                          541                          294
## ENSG00000000938.13                         1609                         1068
##                    TCGA-55-8514-01A-11R-2403-07 TCGA-78-7537-01A-11R-2066-07
## ENSG00000000003.15                         4916                         3190
## ENSG00000000005.6                             2                            2
## ENSG00000000419.13                          873                         1083
## ENSG00000000457.14                          699                         1131
## ENSG00000000460.17                          162                          226
## ENSG00000000938.13                          636                          535
##                    TCGA-69-A59K-01A-11R-A262-07 TCGA-95-8494-01A-11R-2326-07
## ENSG00000000003.15                         1338                         1260
## ENSG00000000005.6                             0                            2
## ENSG00000000419.13                         2217                         1139
## ENSG00000000457.14                         1101                          334
## ENSG00000000460.17                          620                          302
## ENSG00000000938.13                         1683                         1037
##                    TCGA-MN-A4N1-01A-11R-A24X-07 TCGA-MP-A4SV-01A-11R-A24X-07
## ENSG00000000003.15                         3495                         2405
## ENSG00000000005.6                             3                           48
## ENSG00000000419.13                         1625                         1770
## ENSG00000000457.14                          788                         1076
## ENSG00000000460.17                          440                          672
## ENSG00000000938.13                          480                         1053
##                    TCGA-50-5942-01A-21R-1755-07 TCGA-55-7903-01A-11R-2170-07
## ENSG00000000003.15                         2021                         2905
## ENSG00000000005.6                             0                          125
## ENSG00000000419.13                          833                         1930
## ENSG00000000457.14                          900                         1103
## ENSG00000000460.17                          133                          574
## ENSG00000000938.13                          237                          701
##                    TCGA-55-8208-01A-11R-2241-07 TCGA-05-4395-01A-01R-1206-07
## ENSG00000000003.15                         2319                         3131
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         2237                         3193
## ENSG00000000457.14                          805                         1326
## ENSG00000000460.17                          762                          483
## ENSG00000000938.13                         3807                          580
##                    TCGA-49-AAR4-01A-12R-A41B-07 TCGA-75-5122-01A-01R-1755-07
## ENSG00000000003.15                         1887                         1806
## ENSG00000000005.6                             1                            1
## ENSG00000000419.13                          855                         1066
## ENSG00000000457.14                          517                          355
## ENSG00000000460.17                          320                          260
## ENSG00000000938.13                         1318                         2381
##                    TCGA-73-7499-01A-11R-2187-07 TCGA-NJ-A55R-01A-11R-A262-07
## ENSG00000000003.15                         6961                         3444
## ENSG00000000005.6                             0                            3
## ENSG00000000419.13                         3794                         2569
## ENSG00000000457.14                         1337                         1977
## ENSG00000000460.17                         1038                          545
## ENSG00000000938.13                         1538                          678
##                    TCGA-NJ-A4YG-01A-22R-A262-07 TCGA-53-A4EZ-01A-12R-A24X-07
## ENSG00000000003.15                         1678                         4627
## ENSG00000000005.6                             7                            0
## ENSG00000000419.13                          987                         1318
## ENSG00000000457.14                          755                          691
## ENSG00000000460.17                          219                          354
## ENSG00000000938.13                         1202                          785
##                    TCGA-78-7156-01A-11R-2039-07 TCGA-55-6970-01A-11R-1949-07
## ENSG00000000003.15                         2296                         4026
## ENSG00000000005.6                             2                            0
## ENSG00000000419.13                          951                         2021
## ENSG00000000457.14                         1196                         1398
## ENSG00000000460.17                          141                          863
## ENSG00000000938.13                          230                         1090
##                    TCGA-NJ-A55O-01A-11R-A262-07 TCGA-99-8025-01A-11R-2241-07
## ENSG00000000003.15                          858                         3868
## ENSG00000000005.6                             0                            3
## ENSG00000000419.13                         1048                         1718
## ENSG00000000457.14                          884                         1126
## ENSG00000000460.17                          234                          450
## ENSG00000000938.13                         1767                          604
##                    TCGA-50-6594-01A-11R-1755-07 TCGA-99-7458-01A-11R-2039-07
## ENSG00000000003.15                         3825                         3352
## ENSG00000000005.6                             0                           11
## ENSG00000000419.13                         1177                         1447
## ENSG00000000457.14                          400                         1294
## ENSG00000000460.17                          449                          310
## ENSG00000000938.13                          367                         1325
##                    TCGA-91-6828-01A-11R-1858-07 TCGA-MP-A4TJ-01A-51R-A262-07
## ENSG00000000003.15                         1472                         1460
## ENSG00000000005.6                             0                            1
## ENSG00000000419.13                          545                          929
## ENSG00000000457.14                          558                          455
## ENSG00000000460.17                          168                          226
## ENSG00000000938.13                          702                         1326
##                    TCGA-91-6828-11A-01R-1858-07 TCGA-99-8028-01A-11R-2241-07
## ENSG00000000003.15                         1453                         2134
## ENSG00000000005.6                             2                            3
## ENSG00000000419.13                          937                         1890
## ENSG00000000457.14                          354                          511
## ENSG00000000460.17                           91                          163
## ENSG00000000938.13                         2558                         2963
##                    TCGA-91-6849-11A-01R-1949-07 TCGA-64-5778-01A-01R-1628-07
## ENSG00000000003.15                         1304                         5440
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                          791                         2397
## ENSG00000000457.14                          375                          756
## ENSG00000000460.17                           57                          530
## ENSG00000000938.13                         1842                          870
##                    TCGA-91-6847-01A-11R-1949-07 TCGA-91-6847-11A-01R-1949-07
## ENSG00000000003.15                        12232                         1275
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         4063                          690
## ENSG00000000457.14                         1599                          440
## ENSG00000000460.17                         1161                           57
## ENSG00000000938.13                          139                         1439
##                    TCGA-49-AARQ-01A-11R-A41B-07 TCGA-55-8205-01A-11R-2241-07
## ENSG00000000003.15                         2405                         2157
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                         1434                         2330
## ENSG00000000457.14                          590                          631
## ENSG00000000460.17                          335                          567
## ENSG00000000938.13                          816                         3184
##                    TCGA-49-AARN-01A-21R-A41B-07 TCGA-86-A4P7-01A-11R-A24X-07
## ENSG00000000003.15                         1779                         1179
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          911                         1243
## ENSG00000000457.14                          565                          661
## ENSG00000000460.17                          204                          229
## ENSG00000000938.13                          263                         1866
##                    TCGA-MP-A4TC-01A-11R-A24X-07 TCGA-55-6975-11A-01R-1949-07
## ENSG00000000003.15                         2477                          510
## ENSG00000000005.6                             0                            2
## ENSG00000000419.13                         1325                          660
## ENSG00000000457.14                          587                          346
## ENSG00000000460.17                          363                           65
## ENSG00000000938.13                          942                         1310
##                    TCGA-50-8459-01A-11R-2326-07 TCGA-55-6975-01A-11R-1949-07
## ENSG00000000003.15                         1799                         1577
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                          926                          829
## ENSG00000000457.14                          421                          352
## ENSG00000000460.17                           96                          307
## ENSG00000000938.13                         2660                          113
##                    TCGA-95-7039-01A-11R-1949-07 TCGA-44-7659-01A-11R-2066-07
## ENSG00000000003.15                         1622                          945
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1300                          901
## ENSG00000000457.14                          341                          709
## ENSG00000000460.17                          235                          127
## ENSG00000000938.13                          281                          489
##                    TCGA-86-8673-01A-11R-2403-07 TCGA-49-4487-01A-21R-1858-07
## ENSG00000000003.15                         1759                         1703
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          963                         1635
## ENSG00000000457.14                          227                          380
## ENSG00000000460.17                          229                          479
## ENSG00000000938.13                          532                          906
##                    TCGA-78-7153-01A-11R-2039-07 TCGA-44-A4SU-01A-11R-A24X-07
## ENSG00000000003.15                         5479                         8804
## ENSG00000000005.6                             3                            2
## ENSG00000000419.13                         1015                         1445
## ENSG00000000457.14                          549                         1075
## ENSG00000000460.17                          204                          327
## ENSG00000000938.13                          196                          427
##                    TCGA-44-2668-01A-01R-0946-07 TCGA-49-4494-01A-01R-1206-07
## ENSG00000000003.15                         3345                         9092
## ENSG00000000005.6                             1                            4
## ENSG00000000419.13                         3374                         2861
## ENSG00000000457.14                         1037                          743
## ENSG00000000460.17                          806                          283
## ENSG00000000938.13                         3583                         1023
##                    TCGA-78-7154-01A-11R-2039-07 TCGA-49-AARE-01A-11R-A41B-07
## ENSG00000000003.15                         4092                         5209
## ENSG00000000005.6                             0                            2
## ENSG00000000419.13                         3568                         1229
## ENSG00000000457.14                          622                          666
## ENSG00000000460.17                          508                          358
## ENSG00000000938.13                          302                          909
##                    TCGA-95-A4VK-01A-11R-A262-07 TCGA-99-AA5R-01A-11R-A39D-07
## ENSG00000000003.15                         2261                         1428
## ENSG00000000005.6                             1                            1
## ENSG00000000419.13                          815                         1011
## ENSG00000000457.14                         1190                          695
## ENSG00000000460.17                          285                          151
## ENSG00000000938.13                          599                         3410
##                    TCGA-55-6969-11A-01R-1949-07 TCGA-97-7938-01A-11R-2170-07
## ENSG00000000003.15                          513                         2015
## ENSG00000000005.6                             1                           15
## ENSG00000000419.13                          754                         1215
## ENSG00000000457.14                          350                          693
## ENSG00000000460.17                           73                          212
## ENSG00000000938.13                         2077                          417
##                    TCGA-J2-A4AE-01A-21R-A24H-07 TCGA-93-7347-01A-11R-2187-07
## ENSG00000000003.15                         2547                         1381
## ENSG00000000005.6                             0                            5
## ENSG00000000419.13                         1365                          998
## ENSG00000000457.14                          612                          715
## ENSG00000000460.17                          184                          195
## ENSG00000000938.13                         1579                         1337
##                    TCGA-62-A470-01A-11R-A24H-07 TCGA-50-5936-11A-01R-1628-07
## ENSG00000000003.15                          775                          721
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          583                          767
## ENSG00000000457.14                          355                          465
## ENSG00000000460.17                          141                           75
## ENSG00000000938.13                          404                         3573
##                    TCGA-78-7148-01A-11R-2039-07 TCGA-35-3615-01A-01R-0946-07
## ENSG00000000003.15                         3257                         4238
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                         1092                         2382
## ENSG00000000457.14                          688                         1372
## ENSG00000000460.17                          262                          363
## ENSG00000000938.13                          362                          684
##                    TCGA-MP-A4TK-01A-11R-A24X-07 TCGA-97-8175-01A-11R-2287-07
## ENSG00000000003.15                         2170                         6145
## ENSG00000000005.6                            21                            2
## ENSG00000000419.13                         1380                         2342
## ENSG00000000457.14                          655                          761
## ENSG00000000460.17                          367                          451
## ENSG00000000938.13                         1714                         1512
##                    TCGA-62-A46V-01A-11R-A24H-07 TCGA-05-4250-01A-01R-1107-07
## ENSG00000000003.15                         2604                         5316
## ENSG00000000005.6                             1                            5
## ENSG00000000419.13                         1848                         2886
## ENSG00000000457.14                          914                          631
## ENSG00000000460.17                          511                          716
## ENSG00000000938.13                          421                         1468
##                    TCGA-44-2665-01A-01R-A278-07 TCGA-44-2665-01B-06R-A277-07
## ENSG00000000003.15                         4000                          401
## ENSG00000000005.6                          1254                           73
## ENSG00000000419.13                         2216                          154
## ENSG00000000457.14                         1726                          221
## ENSG00000000460.17                          494                          102
## ENSG00000000938.13                          301                           79
##                    TCGA-50-5044-01A-21R-1858-07 TCGA-64-1678-01A-01R-0946-07
## ENSG00000000003.15                          936                         3861
## ENSG00000000005.6                             0                            0
## ENSG00000000419.13                          705                         3149
## ENSG00000000457.14                          175                          419
## ENSG00000000460.17                          164                          502
## ENSG00000000938.13                          332                           69
##                    TCGA-78-7155-01A-11R-2039-07 TCGA-78-7220-01A-11R-2039-07
## ENSG00000000003.15                         3875                         2906
## ENSG00000000005.6                             8                            0
## ENSG00000000419.13                         1171                         2724
## ENSG00000000457.14                          421                          909
## ENSG00000000460.17                          814                          760
## ENSG00000000938.13                          162                          286
##                    TCGA-80-5611-01A-01R-1628-07 TCGA-93-8067-01A-11R-2287-07
## ENSG00000000003.15                         3254                         3434
## ENSG00000000005.6                             1                            0
## ENSG00000000419.13                         1781                         2676
## ENSG00000000457.14                          725                         1269
## ENSG00000000460.17                          556                          765
## ENSG00000000938.13                         1467                          801
gene_info <- as.data.frame(rowData(data))
head(gene_info)  # Preview the first few genes and their annotations
##                    source type score phase            gene_id      gene_type
## ENSG00000000003.15 HAVANA gene    NA    NA ENSG00000000003.15 protein_coding
## ENSG00000000005.6  HAVANA gene    NA    NA  ENSG00000000005.6 protein_coding
## ENSG00000000419.13 HAVANA gene    NA    NA ENSG00000000419.13 protein_coding
## ENSG00000000457.14 HAVANA gene    NA    NA ENSG00000000457.14 protein_coding
## ENSG00000000460.17 HAVANA gene    NA    NA ENSG00000000460.17 protein_coding
## ENSG00000000938.13 HAVANA gene    NA    NA ENSG00000000938.13 protein_coding
##                    gene_name level    hgnc_id          havana_gene
## ENSG00000000003.15    TSPAN6     2 HGNC:11858 OTTHUMG00000022002.2
## ENSG00000000005.6       TNMD     2 HGNC:17757 OTTHUMG00000022001.2
## ENSG00000000419.13      DPM1     2  HGNC:3005 OTTHUMG00000032742.2
## ENSG00000000457.14     SCYL3     2 HGNC:19285 OTTHUMG00000035941.6
## ENSG00000000460.17  C1orf112     2 HGNC:25565 OTTHUMG00000035821.9
## ENSG00000000938.13       FGR     2  HGNC:3697 OTTHUMG00000003516.3
sample_info <- as.data.frame(colData(data))
head(sample_info)  # Preview sample metadata
##                                                   barcode      patient
## TCGA-73-4658-01A-01R-1755-07 TCGA-73-4658-01A-01R-1755-07 TCGA-73-4658
## TCGA-44-2661-11A-01R-1758-07 TCGA-44-2661-11A-01R-1758-07 TCGA-44-2661
## TCGA-55-6986-11A-01R-1949-07 TCGA-55-6986-11A-01R-1949-07 TCGA-55-6986
## TCGA-55-8615-01A-11R-2403-07 TCGA-55-8615-01A-11R-2403-07 TCGA-55-8615
## TCGA-97-8177-01A-11R-2287-07 TCGA-97-8177-01A-11R-2287-07 TCGA-97-8177
## TCGA-49-6744-11A-01R-1858-07 TCGA-49-6744-11A-01R-1858-07 TCGA-49-6744
##                                        sample shortLetterCode
## TCGA-73-4658-01A-01R-1755-07 TCGA-73-4658-01A              TP
## TCGA-44-2661-11A-01R-1758-07 TCGA-44-2661-11A              NT
## TCGA-55-6986-11A-01R-1949-07 TCGA-55-6986-11A              NT
## TCGA-55-8615-01A-11R-2403-07 TCGA-55-8615-01A              TP
## TCGA-97-8177-01A-11R-2287-07 TCGA-97-8177-01A              TP
## TCGA-49-6744-11A-01R-1858-07 TCGA-49-6744-11A              NT
##                                       definition sample_submitter_id
## TCGA-73-4658-01A-01R-1755-07 Primary solid Tumor    TCGA-73-4658-01A
## TCGA-44-2661-11A-01R-1758-07 Solid Tissue Normal    TCGA-44-2661-11A
## TCGA-55-6986-11A-01R-1949-07 Solid Tissue Normal    TCGA-55-6986-11A
## TCGA-55-8615-01A-11R-2403-07 Primary solid Tumor    TCGA-55-8615-01A
## TCGA-97-8177-01A-11R-2287-07 Primary solid Tumor    TCGA-97-8177-01A
## TCGA-49-6744-11A-01R-1858-07 Solid Tissue Normal    TCGA-49-6744-11A
##                              sample_type_id tumor_descriptor
## TCGA-73-4658-01A-01R-1755-07             01          Primary
## TCGA-44-2661-11A-01R-1758-07             11   Not Applicable
## TCGA-55-6986-11A-01R-1949-07             11   Not Applicable
## TCGA-55-8615-01A-11R-2403-07             01          Primary
## TCGA-97-8177-01A-11R-2287-07             01          Primary
## TCGA-49-6744-11A-01R-1858-07             11   Not Applicable
##                                                         sample_id submitter_id
## TCGA-73-4658-01A-01R-1755-07 bfde37f2-ab6f-4426-a33d-ef9d21772f02 TCGA-73-4658
## TCGA-44-2661-11A-01R-1758-07 2a7da235-f069-4efa-ac35-649520b4dbb3 TCGA-44-2661
## TCGA-55-6986-11A-01R-1949-07 e8e0e53a-4009-4141-ba6f-eed35fcaad7b TCGA-55-6986
## TCGA-55-8615-01A-11R-2403-07 7873c1a9-b1ea-46cc-b787-dfbad3366494 TCGA-55-8615
## TCGA-97-8177-01A-11R-2287-07 5711ce53-0bac-4a5d-b7ba-4e1bc0c56924 TCGA-97-8177
## TCGA-49-6744-11A-01R-1858-07 e3962e0e-eafa-4016-8fa4-1c4ba0d2a9ad TCGA-49-6744
##                                      sample_type oct_embedded specimen_type
## TCGA-73-4658-01A-01R-1755-07       Primary Tumor         <NA>  Solid Tissue
## TCGA-44-2661-11A-01R-1758-07 Solid Tissue Normal         <NA>  Solid Tissue
## TCGA-55-6986-11A-01R-1949-07 Solid Tissue Normal         <NA>  Solid Tissue
## TCGA-55-8615-01A-11R-2403-07       Primary Tumor         <NA>  Solid Tissue
## TCGA-97-8177-01A-11R-2287-07       Primary Tumor         <NA>  Solid Tissue
## TCGA-49-6744-11A-01R-1858-07 Solid Tissue Normal         <NA>  Solid Tissue
##                                 state is_ffpe tissue_type preservation_method
## TCGA-73-4658-01A-01R-1755-07 released   FALSE       Tumor             Unknown
## TCGA-44-2661-11A-01R-1758-07 released   FALSE      Normal             Unknown
## TCGA-55-6986-11A-01R-1949-07 released   FALSE      Normal             Unknown
## TCGA-55-8615-01A-11R-2403-07 released   FALSE       Tumor             Unknown
## TCGA-97-8177-01A-11R-2287-07 released   FALSE       Tumor             Unknown
## TCGA-49-6744-11A-01R-1858-07 released   FALSE      Normal             Unknown
##                               composition days_to_collection initial_weight
## TCGA-73-4658-01A-01R-1755-07 Not Reported                 NA             NA
## TCGA-44-2661-11A-01R-1758-07 Not Reported                 NA             NA
## TCGA-55-6986-11A-01R-1949-07 Not Reported                 NA             NA
## TCGA-55-8615-01A-11R-2403-07 Not Reported                 NA             NA
## TCGA-97-8177-01A-11R-2287-07 Not Reported                 NA             NA
## TCGA-49-6744-11A-01R-1858-07 Not Reported                 NA             NA
##                              intermediate_dimension
## TCGA-73-4658-01A-01R-1755-07                    1.0
## TCGA-44-2661-11A-01R-1758-07                    0.9
## TCGA-55-6986-11A-01R-1949-07                    0.6
## TCGA-55-8615-01A-11R-2403-07                    0.9
## TCGA-97-8177-01A-11R-2287-07                    1.7
## TCGA-49-6744-11A-01R-1858-07                    0.9
##                                             pathology_report_uuid
## TCGA-73-4658-01A-01R-1755-07 37bb6a7b-4f9e-4690-b904-4eebc3189562
## TCGA-44-2661-11A-01R-1758-07                                 <NA>
## TCGA-55-6986-11A-01R-1949-07                                 <NA>
## TCGA-55-8615-01A-11R-2403-07 37b4238a-1aea-4670-8c43-b24525b94cdd
## TCGA-97-8177-01A-11R-2287-07 addbe4e2-114d-4e38-aa21-c3e9695da8fa
## TCGA-49-6744-11A-01R-1858-07                                 <NA>
##                              shortest_dimension longest_dimension
## TCGA-73-4658-01A-01R-1755-07                0.3               1.4
## TCGA-44-2661-11A-01R-1758-07                0.4               1.3
## TCGA-55-6986-11A-01R-1949-07                0.5               0.7
## TCGA-55-8615-01A-11R-2403-07                0.6               0.9
## TCGA-97-8177-01A-11R-2287-07                0.3               2.4
## TCGA-49-6744-11A-01R-1858-07                0.4               1.0
##                              synchronous_malignancy ajcc_pathologic_stage
## TCGA-73-4658-01A-01R-1755-07           Not Reported              Stage IB
## TCGA-44-2661-11A-01R-1758-07           Not Reported              Stage IA
## TCGA-55-6986-11A-01R-1949-07                     No              Stage IB
## TCGA-55-8615-01A-11R-2403-07                     No            Stage IIIA
## TCGA-97-8177-01A-11R-2287-07                     No              Stage IB
## TCGA-49-6744-11A-01R-1858-07                     No             Stage IIA
##                              days_to_diagnosis   treatments
## TCGA-73-4658-01A-01R-1755-07                 0 c(NA, NA....
## TCGA-44-2661-11A-01R-1758-07                 0 c(NA, NA....
## TCGA-55-6986-11A-01R-1949-07                 0 c(NA, NA....
## TCGA-55-8615-01A-11R-2403-07                 0 c(NA, NA....
## TCGA-97-8177-01A-11R-2287-07                 0 c(NA, NA....
## TCGA-49-6744-11A-01R-1858-07                 0 c(NA, NA....
##                              last_known_disease_status
## TCGA-73-4658-01A-01R-1755-07              not reported
## TCGA-44-2661-11A-01R-1758-07              not reported
## TCGA-55-6986-11A-01R-1949-07              not reported
## TCGA-55-8615-01A-11R-2403-07              not reported
## TCGA-97-8177-01A-11R-2287-07              not reported
## TCGA-49-6744-11A-01R-1858-07              not reported
##                              tissue_or_organ_of_origin days_to_last_follow_up
## TCGA-73-4658-01A-01R-1755-07          Lower lobe, lung                   1600
## TCGA-44-2661-11A-01R-1758-07          Upper lobe, lung                   1159
## TCGA-55-6986-11A-01R-1949-07          Lower lobe, lung                   3261
## TCGA-55-8615-01A-11R-2403-07         Middle lobe, lung                    446
## TCGA-97-8177-01A-11R-2287-07          Lower lobe, lung                    499
## TCGA-49-6744-11A-01R-1858-07          Upper lobe, lung                   1683
##                              age_at_diagnosis
## TCGA-73-4658-01A-01R-1755-07            29508
## TCGA-44-2661-11A-01R-1758-07            25313
## TCGA-55-6986-11A-01R-1949-07               NA
## TCGA-55-8615-01A-11R-2403-07            24786
## TCGA-97-8177-01A-11R-2287-07            21648
## TCGA-49-6744-11A-01R-1858-07            23484
##                                                        primary_diagnosis
## TCGA-73-4658-01A-01R-1755-07                         Adenocarcinoma, NOS
## TCGA-44-2661-11A-01R-1758-07                         Adenocarcinoma, NOS
## TCGA-55-6986-11A-01R-1949-07 Bronchiolo-alveolar carcinoma, non-mucinous
## TCGA-55-8615-01A-11R-2403-07                         Adenocarcinoma, NOS
## TCGA-97-8177-01A-11R-2287-07          Adenocarcinoma with mixed subtypes
## TCGA-49-6744-11A-01R-1858-07          Adenocarcinoma with mixed subtypes
##                              prior_malignancy year_of_diagnosis prior_treatment
## TCGA-73-4658-01A-01R-1755-07              yes              2004              No
## TCGA-44-2661-11A-01R-1758-07              yes              2009              No
## TCGA-55-6986-11A-01R-1949-07               no              2004              No
## TCGA-55-8615-01A-11R-2403-07               no              2012              No
## TCGA-97-8177-01A-11R-2287-07               no              2012              No
## TCGA-49-6744-11A-01R-1858-07               no              2010              No
##                              ajcc_staging_system_edition ajcc_pathologic_t
## TCGA-73-4658-01A-01R-1755-07                         6th                T2
## TCGA-44-2661-11A-01R-1758-07                         6th                T1
## TCGA-55-6986-11A-01R-1949-07                         6th                T2
## TCGA-55-8615-01A-11R-2403-07                         7th                T3
## TCGA-97-8177-01A-11R-2287-07                         7th               T2a
## TCGA-49-6744-11A-01R-1858-07                         7th               T2a
##                              morphology ajcc_pathologic_n ajcc_pathologic_m
## TCGA-73-4658-01A-01R-1755-07     8140/3                N0                M0
## TCGA-44-2661-11A-01R-1758-07     8140/3                N0                M0
## TCGA-55-6986-11A-01R-1949-07     8252/3                N0                M0
## TCGA-55-8615-01A-11R-2403-07     8140/3                N2                MX
## TCGA-97-8177-01A-11R-2287-07     8255/3                N0                M0
## TCGA-49-6744-11A-01R-1858-07     8255/3                N1                MX
##                              classification_of_tumor
## TCGA-73-4658-01A-01R-1755-07            not reported
## TCGA-44-2661-11A-01R-1758-07            not reported
## TCGA-55-6986-11A-01R-1949-07            not reported
## TCGA-55-8615-01A-11R-2403-07            not reported
## TCGA-97-8177-01A-11R-2287-07            not reported
## TCGA-49-6744-11A-01R-1858-07            not reported
##                                                      diagnosis_id icd_10_code
## TCGA-73-4658-01A-01R-1755-07 6e678430-a27c-5412-b531-49b344cadb05       C34.3
## TCGA-44-2661-11A-01R-1758-07 62584f05-9d54-5926-8d4e-2e3787dd6508       C34.1
## TCGA-55-6986-11A-01R-1949-07 8f6f13a3-c7f2-5028-b188-909c5cc21afe       C34.3
## TCGA-55-8615-01A-11R-2403-07 6dbc0170-58ff-560a-800e-781837e97b76       C34.2
## TCGA-97-8177-01A-11R-2287-07 06391f78-eb30-5803-87b4-0a0c5a555399       C34.3
## TCGA-49-6744-11A-01R-1858-07 4a84a8c9-ca7d-5349-b914-fc25cac0c101       C34.1
##                              site_of_resection_or_biopsy  tumor_grade
## TCGA-73-4658-01A-01R-1755-07            Lower lobe, lung Not Reported
## TCGA-44-2661-11A-01R-1758-07            Upper lobe, lung Not Reported
## TCGA-55-6986-11A-01R-1949-07            Lower lobe, lung Not Reported
## TCGA-55-8615-01A-11R-2403-07           Middle lobe, lung Not Reported
## TCGA-97-8177-01A-11R-2287-07            Lower lobe, lung Not Reported
## TCGA-49-6744-11A-01R-1858-07            Upper lobe, lung Not Reported
##                              progression_or_recurrence cigarettes_per_day
## TCGA-73-4658-01A-01R-1755-07              not reported           1.369863
## TCGA-44-2661-11A-01R-1758-07              not reported                 NA
## TCGA-55-6986-11A-01R-1949-07              not reported                 NA
## TCGA-55-8615-01A-11R-2403-07              not reported           3.671233
## TCGA-97-8177-01A-11R-2287-07              not reported                 NA
## TCGA-49-6744-11A-01R-1858-07              not reported           1.095890
##                              alcohol_history
## TCGA-73-4658-01A-01R-1755-07    Not Reported
## TCGA-44-2661-11A-01R-1758-07    Not Reported
## TCGA-55-6986-11A-01R-1949-07    Not Reported
## TCGA-55-8615-01A-11R-2403-07    Not Reported
## TCGA-97-8177-01A-11R-2287-07    Not Reported
## TCGA-49-6744-11A-01R-1858-07    Not Reported
##                                                       exposure_id years_smoked
## TCGA-73-4658-01A-01R-1755-07 80ac17a4-ed8b-5c3b-a85e-72dfab59fda8           NA
## TCGA-44-2661-11A-01R-1758-07 ccbe3e80-fbcc-51a2-a70f-ec92252879cc           NA
## TCGA-55-6986-11A-01R-1949-07 2d802058-cd51-5d81-b522-97cd56503224           NA
## TCGA-55-8615-01A-11R-2403-07 4f7281f1-2b5e-5885-b53d-6306a02d5f19           NA
## TCGA-97-8177-01A-11R-2287-07 e30f73bb-50af-5e71-870b-51379f82ceca           NA
## TCGA-49-6744-11A-01R-1858-07 5e2340a9-165e-541d-b36f-01ae61b4d497           NA
##                              pack_years_smoked  race gender
## TCGA-73-4658-01A-01R-1755-07                25 white female
## TCGA-44-2661-11A-01R-1758-07                NA white female
## TCGA-55-6986-11A-01R-1949-07                NA white female
## TCGA-55-8615-01A-11R-2403-07                67 white   male
## TCGA-97-8177-01A-11R-2287-07                NA white female
## TCGA-49-6744-11A-01R-1858-07                20 white female
##                                           ethnicity vital_status age_at_index
## TCGA-73-4658-01A-01R-1755-07 not hispanic or latino         Dead           80
## TCGA-44-2661-11A-01R-1758-07 not hispanic or latino        Alive           69
## TCGA-55-6986-11A-01R-1949-07           not reported        Alive           74
## TCGA-55-8615-01A-11R-2403-07 not hispanic or latino        Alive           67
## TCGA-97-8177-01A-11R-2287-07 not hispanic or latino        Alive           59
## TCGA-49-6744-11A-01R-1858-07           not reported        Alive           64
##                              days_to_birth year_of_birth
## TCGA-73-4658-01A-01R-1755-07        -29508          1924
## TCGA-44-2661-11A-01R-1758-07        -25313          1940
## TCGA-55-6986-11A-01R-1949-07            NA          1930
## TCGA-55-8615-01A-11R-2403-07        -24786          1945
## TCGA-97-8177-01A-11R-2287-07        -21648          1953
## TCGA-49-6744-11A-01R-1858-07        -23484          1946
##                                                    demographic_id days_to_death
## TCGA-73-4658-01A-01R-1755-07 21d19606-f883-5be3-adbb-20b41c95627b          1600
## TCGA-44-2661-11A-01R-1758-07 58e2c036-94cc-5ebf-a3e3-1e0e28182c8d            NA
## TCGA-55-6986-11A-01R-1949-07 da8cdcae-beae-5745-b739-df6f304f4973            NA
## TCGA-55-8615-01A-11R-2403-07 0948c018-0a80-5822-b435-758d4ba23af5            NA
## TCGA-97-8177-01A-11R-2287-07 7fcd8882-f338-5643-a7a0-184902014715            NA
## TCGA-49-6744-11A-01R-1858-07 d0e66cc5-32bd-5a01-8057-4ac557d5f5a9            NA
##                              year_of_death bcr_patient_barcode primary_site
## TCGA-73-4658-01A-01R-1755-07          2008    TCGA-73-4658-01A Bronchus....
## TCGA-44-2661-11A-01R-1758-07            NA    TCGA-44-2661-11A Bronchus....
## TCGA-55-6986-11A-01R-1949-07            NA    TCGA-55-6986-11A Bronchus....
## TCGA-55-8615-01A-11R-2403-07            NA    TCGA-55-8615-01A Bronchus....
## TCGA-97-8177-01A-11R-2287-07            NA    TCGA-97-8177-01A Bronchus....
## TCGA-49-6744-11A-01R-1858-07            NA    TCGA-49-6744-11A Bronchus....
##                              project_id disease_type                name
## TCGA-73-4658-01A-01R-1755-07  TCGA-LUAD Cystic, .... Lung Adenocarcinoma
## TCGA-44-2661-11A-01R-1758-07  TCGA-LUAD Cystic, .... Lung Adenocarcinoma
## TCGA-55-6986-11A-01R-1949-07  TCGA-LUAD Cystic, .... Lung Adenocarcinoma
## TCGA-55-8615-01A-11R-2403-07  TCGA-LUAD Cystic, .... Lung Adenocarcinoma
## TCGA-97-8177-01A-11R-2287-07  TCGA-LUAD Cystic, .... Lung Adenocarcinoma
## TCGA-49-6744-11A-01R-1858-07  TCGA-LUAD Cystic, .... Lung Adenocarcinoma
##                              releasable released paper_patient paper_Sex
## TCGA-73-4658-01A-01R-1755-07       TRUE     TRUE  TCGA-73-4658    FEMALE
## TCGA-44-2661-11A-01R-1758-07       TRUE     TRUE          <NA>      <NA>
## TCGA-55-6986-11A-01R-1949-07       TRUE     TRUE          <NA>      <NA>
## TCGA-55-8615-01A-11R-2403-07       TRUE     TRUE          <NA>      <NA>
## TCGA-97-8177-01A-11R-2287-07       TRUE     TRUE          <NA>      <NA>
## TCGA-49-6744-11A-01R-1858-07       TRUE     TRUE          <NA>      <NA>
##                              paper_Age.at.diagnosis paper_T.stage paper_N.stage
## TCGA-73-4658-01A-01R-1755-07                     80            T2            N0
## TCGA-44-2661-11A-01R-1758-07                   <NA>          <NA>          <NA>
## TCGA-55-6986-11A-01R-1949-07                   <NA>          <NA>          <NA>
## TCGA-55-8615-01A-11R-2403-07                   <NA>          <NA>          <NA>
## TCGA-97-8177-01A-11R-2287-07                   <NA>          <NA>          <NA>
## TCGA-49-6744-11A-01R-1858-07                   <NA>          <NA>          <NA>
##                              paper_Tumor.stage
## TCGA-73-4658-01A-01R-1755-07          Stage IB
## TCGA-44-2661-11A-01R-1758-07              <NA>
## TCGA-55-6986-11A-01R-1949-07              <NA>
## TCGA-55-8615-01A-11R-2403-07              <NA>
## TCGA-97-8177-01A-11R-2287-07              <NA>
## TCGA-49-6744-11A-01R-1858-07              <NA>
##                                                paper_Smoking.Status
## TCGA-73-4658-01A-01R-1755-07 Current reformed smoker for > 15 years
## TCGA-44-2661-11A-01R-1758-07                                   <NA>
## TCGA-55-6986-11A-01R-1949-07                                   <NA>
## TCGA-55-8615-01A-11R-2403-07                                   <NA>
## TCGA-97-8177-01A-11R-2287-07                                   <NA>
## TCGA-49-6744-11A-01R-1858-07                                   <NA>
##                              paper_Survival paper_Transversion.High.Low
## TCGA-73-4658-01A-01R-1755-07       DECEASED                        High
## TCGA-44-2661-11A-01R-1758-07           <NA>                        <NA>
## TCGA-55-6986-11A-01R-1949-07           <NA>                        <NA>
## TCGA-55-8615-01A-11R-2403-07           <NA>                        <NA>
## TCGA-97-8177-01A-11R-2287-07           <NA>                        <NA>
## TCGA-49-6744-11A-01R-1858-07           <NA>                        <NA>
##                              paper_Nonsilent.Mutations
## TCGA-73-4658-01A-01R-1755-07                       277
## TCGA-44-2661-11A-01R-1758-07                        NA
## TCGA-55-6986-11A-01R-1949-07                        NA
## TCGA-55-8615-01A-11R-2403-07                        NA
## TCGA-97-8177-01A-11R-2287-07                        NA
## TCGA-49-6744-11A-01R-1858-07                        NA
##                              paper_Nonsilent.Mutations.per.Mb
## TCGA-73-4658-01A-01R-1755-07                             7,95
## TCGA-44-2661-11A-01R-1758-07                             <NA>
## TCGA-55-6986-11A-01R-1949-07                             <NA>
## TCGA-55-8615-01A-11R-2403-07                             <NA>
## TCGA-97-8177-01A-11R-2287-07                             <NA>
## TCGA-49-6744-11A-01R-1858-07                             <NA>
##                              paper_Oncogene.Negative.or.Positive.Groups
## TCGA-73-4658-01A-01R-1755-07                          Oncogene Negative
## TCGA-44-2661-11A-01R-1758-07                                       <NA>
## TCGA-55-6986-11A-01R-1949-07                                       <NA>
## TCGA-55-8615-01A-11R-2403-07                                       <NA>
## TCGA-97-8177-01A-11R-2287-07                                       <NA>
## TCGA-49-6744-11A-01R-1858-07                                       <NA>
##                              paper_Fusions paper_expression_subtype
## TCGA-73-4658-01A-01R-1755-07                           prox.-inflam
## TCGA-44-2661-11A-01R-1758-07          <NA>                     <NA>
## TCGA-55-6986-11A-01R-1949-07          <NA>                     <NA>
## TCGA-55-8615-01A-11R-2403-07          <NA>                     <NA>
## TCGA-97-8177-01A-11R-2287-07          <NA>                     <NA>
## TCGA-49-6744-11A-01R-1858-07          <NA>                     <NA>
##                              paper_chromosome.affected.by.chromothripsis
## TCGA-73-4658-01A-01R-1755-07                                        <NA>
## TCGA-44-2661-11A-01R-1758-07                                        <NA>
## TCGA-55-6986-11A-01R-1949-07                                        <NA>
## TCGA-55-8615-01A-11R-2403-07                                        <NA>
## TCGA-97-8177-01A-11R-2287-07                                        <NA>
## TCGA-49-6744-11A-01R-1858-07                                        <NA>
##                              paper_iCluster.Group
## TCGA-73-4658-01A-01R-1755-07                    4
## TCGA-44-2661-11A-01R-1758-07                   NA
## TCGA-55-6986-11A-01R-1949-07                   NA
## TCGA-55-8615-01A-11R-2403-07                   NA
## TCGA-97-8177-01A-11R-2287-07                   NA
## TCGA-49-6744-11A-01R-1858-07                   NA
##                              paper_CIMP.methylation.signature.
## TCGA-73-4658-01A-01R-1755-07                             high 
## TCGA-44-2661-11A-01R-1758-07                              <NA>
## TCGA-55-6986-11A-01R-1949-07                              <NA>
## TCGA-55-8615-01A-11R-2403-07                              <NA>
## TCGA-97-8177-01A-11R-2287-07                              <NA>
## TCGA-49-6744-11A-01R-1858-07                              <NA>
##                              paper_MTOR.mechanism.of.mTOR.pathway.activation
## TCGA-73-4658-01A-01R-1755-07                                       unaligned
## TCGA-44-2661-11A-01R-1758-07                                            <NA>
## TCGA-55-6986-11A-01R-1949-07                                            <NA>
## TCGA-55-8615-01A-11R-2403-07                                            <NA>
## TCGA-97-8177-01A-11R-2287-07                                            <NA>
## TCGA-49-6744-11A-01R-1858-07                                            <NA>
##                              paper_Ploidy.ABSOLUTE.calls
## TCGA-73-4658-01A-01R-1755-07                        1,96
## TCGA-44-2661-11A-01R-1758-07                        <NA>
## TCGA-55-6986-11A-01R-1949-07                        <NA>
## TCGA-55-8615-01A-11R-2403-07                        <NA>
## TCGA-97-8177-01A-11R-2287-07                        <NA>
## TCGA-49-6744-11A-01R-1858-07                        <NA>
##                              paper_Purity.ABSOLUTE.calls
## TCGA-73-4658-01A-01R-1755-07                        0,35
## TCGA-44-2661-11A-01R-1758-07                        <NA>
## TCGA-55-6986-11A-01R-1949-07                        <NA>
## TCGA-55-8615-01A-11R-2403-07                        <NA>
## TCGA-97-8177-01A-11R-2287-07                        <NA>
## TCGA-49-6744-11A-01R-1858-07                        <NA>
table(sample_info$sample_type)  # Summarize sample types (Tumor vs. Normal)
## 
##       Primary Tumor Solid Tissue Normal 
##                 539                  59
# Extract just the normal sample info
sample_info_normal <- sample_info[sample_info$definition=="Solid Tissue Normal",]

# Look for tumor samples with normal matches from same patients
sample_info_tumor <- sample_info %>%
  filter(patient %in% sample_info_normal$patient) %>%
  filter(definition == "Primary solid Tumor")

# The tumor list is longer -- check out duplicate patient IDs in this list
sample_info_tumor_dups <- sample_info_tumor %>%
  group_by(patient) %>%
  filter(n() > 1) %>%
  ungroup()

unique(sample_info_tumor_dups$patient) # There are 6 patients with multiple tumor samples
## [1] "TCGA-44-6147" "TCGA-44-2662" "TCGA-44-5645" "TCGA-44-6146" "TCGA-44-2668"
## [6] "TCGA-44-2665"
sample_info_tumor_dups_FFPE <- sample_info_tumor_dups[sample_info_tumor_dups$is_ffpe,] # OK the difference is the FFPE status.
# It seems these are the only 6 patients in the group who have FFPE samples available.

# I guess I will make the decision to keep the 6 FFPE samples regardless. Not sure if that's the right choice but I'll do it for now.

# Get the non-FFPE duplicate patient sample info
sample_info_tumor_dups_non_FFPE <- sample_info_tumor_dups[!sample_info_tumor_dups$is_ffpe,]
# Remove these IDs from the main tumor sample info
sample_info_tumor <- sample_info_tumor %>% filter(! barcode %in% sample_info_tumor_dups_non_FFPE$barcode)

# There is 1 normal sample with no matching tumor sample it seems, so remove that
sample_info_normal <- sample_info_normal %>% filter(patient != "TCGA-44-6144")

# Make the matched tumor-normal sample table
sample_info_matched_T_NM <- rbind(sample_info_tumor, sample_info_normal)[order(c(seq_len(nrow(sample_info_tumor)), seq_len(nrow(sample_info_normal)))), ]
sample_info_matched_T_NM <- sample_info_matched_T_NM %>% 
  dplyr::select(-treatments) %>% # Removing treatments column since it is in the form of a list and has no info
  arrange(., sample_type_id) %>% # First sort by tumor vs normal
  arrange(., patient) # arrange by patient to get the tumor normal pairs

## Modifying the counts table for tumor-normal matched data ##

# Keep the counts columns of sample labels that are in the T-NM matched info
sample_barcodes <- as.character(sample_info_matched_T_NM$barcode)
counts_matched_T_NM <- counts %>%
  dplyr::select(all_of(sample_barcodes))

# Rename with sample label instead of sample barcode
names(counts_matched_T_NM) <- sample_info_matched_T_NM$sample

2.2.1 Quality control checks

library(dplyr)
library(edgeR)
## Warning: package 'edgeR' was built under R version 4.3.2
# Checking distribution of the whole counts table
hist(as.matrix(counts_matched_T_NM)) # whoa

hist(log2(as.matrix(counts_matched_T_NM))) # Still not normal at all

# Checking distribution of just tumor samples
counts_matched_T <- counts_matched_T_NM %>%
  dplyr::select(seq(1, ncol(counts_matched_T_NM), by = 2))
hist(log2(as.matrix(counts_matched_T))) # Equally bad distribution, why is it the same though??

# Checking distribution of just normal samples
counts_matched_NM <- counts_matched_T_NM %>%
  dplyr::select(seq(2, ncol(counts_matched_T_NM), by = 2))
hist(log2(as.matrix(counts_matched_NM))) # Equally bad distribution, why is it the same though????

boxplot(counts_matched_T_NM) # Boxplots for all counts looks crazy

# boxplot(counts_matched_T) # Boxplots for just tumors looks crazy
# boxplot(counts_matched_NM) # Boxplots for just normals looks crazy

## PCA to check for tumor-normal separation
colz <- as.numeric(as.factor(rep(c(0,1), length(counts_matched_T_NM)/2))) # Get color values from group
plotMDS(counts_matched_T_NM,
        gene.selection = "common",
        main = "PCA for TCGA-LUAD expression",
        col = colz,
        pch = 1
)

# Separate but not very good separation, 1 definite outlier.
# To find the outlier, plotting PCA with sample names
plotMDS(counts_matched_T_NM,
        gene.selection = "common",
        main = "PCA for TCGA-LUAD expression",
        col = colz
        #pch = 1
)

# Checking out this outlier, TCGA-38-4626-01A
hist(log2(counts_matched_T_NM$`TCGA-38-4626-01A`)) # Not obvious why it's an outlier, but must somehow be really normal-like?

##  Making a dendrogram to see if the same outliers are found
sample_dist <- dist(t(counts_matched_T_NM))  # Transpose the matrix to calculate distances between samples
hc <- hclust(sample_dist) #Perform hierarchical clustering
plot(hc, main = "Dendrogram of Samples", xlab = "", sub = "", cex = 0.8) # Plot the dendrogram

2.2.2 Acting on quality control checks

# Remove the 1 most obvious outlier and its pair:

# TCGA-38-4626-01A, TCGA-38-4626-11A

counts_matched_T_NM <- counts_matched_T_NM %>% dplyr::select(-c("TCGA-38-4626-01A","TCGA-38-4626-11A"))

# counts_matched_T_NM <- counts_matched_T_NM %>% dplyr::select(-c("TCGA.38.4626.01A","TCGA.38.4626.11A"))
# Version after reading it in

## PCA to check for tumor-normal separation with outlier removed
colz2 <- as.numeric(as.factor(rep(c(0,1), length(counts_matched_T_NM)/2))) # Get color values from group
plotMDS(counts_matched_T_NM,
        gene.selection = "common",
        main = "PCA for TCGA-LUAD expression after outlier removal",
        col = colz2,
        pch = 1
)

## Saving this version of the T-NM matched counts
#write.table(counts_matched_T_NM, "../2_Outputs/3_Tumor_expression/TCGA_LUAD_counts_matched_T_NM_20241125.txt")

The matrices have messy boxplots and histograms, but since I am using the signed-rank test, it does not suppose require normally distributed data, so I have decided to go with this raw counts matrix for now.

2.3.1 Differential expression analysis using signed-rank test, and filter to FDR < 0.05

DGE/DEG analysis is based on a recent paper: Li et al. Genome Biology (2022) 23:79 Source code: https://github.com/xihuimeijing/DEGs_Analysis_FDR/blob/main/scripts/DEGs.R Accessed 2023/08/26

Tutorial: https://rpubs.com/LiYumei/806213 Accessed 2023/08/31

Unlike the tutorial, here I perform a signed-rank test rather than a rank-sum test, as the samples are not independent (they are matched tumor and normal samples).

library(edgeR)

# Make DGElist
readCount <- counts_matched_T_NM
conditions <- factor(matrix(c(rep(c(1,2),length(counts_matched_T_NM)/2)), ncol=1)) # Columns are alternating tumor-normal pairs
y <- DGEList(counts=readCount,group=conditions)

#Filter out genes with very low counts using the filterByExpr function (default)
keep <- filterByExpr(y)
y <- y[keep,,keep.lib.sizes=FALSE]

#Perform TMM normalization and transfer to CPM (Counts Per Million)
y <- calcNormFactors(y,method="TMM")
count_norm=cpm(y)
count_norm<-as.data.frame(count_norm)

#Unlike the tutorial, here I specify "paired = TRUE" to ensure the test is a signed-rank test as opposed to a rank-sum test.
pvalues <- sapply(1:nrow(count_norm),function(i){
  data<-cbind.data.frame(gene=as.numeric(t(count_norm[i,])),conditions)
  p=wilcox.test(gene~conditions, data, paired = TRUE,)$p.value
  return(p)
})
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with zeroes
#Return false discovery rate - corrected P-values
fdr=p.adjust(pvalues,method = "fdr")

#Calculate log fold change
tumor_indexes = seq(1, ncol(count_norm), by = 2)
normal_indexes = seq(2, ncol(count_norm), by = 2)

tumor_values=count_norm[,tumor_indexes]
normal_values=count_norm[,normal_indexes]
foldChanges=log2(rowMeans(tumor_values, na.rm = TRUE)/rowMeans(normal_values, na.rm = TRUE))

#Output results to a unified table!
DGE_LUAD_T_NM_signed_rank <-data.frame(log2foldChange=foldChanges, pValues=pvalues, FDR=fdr)
rownames(DGE_LUAD_T_NM_signed_rank)=rownames(count_norm)
DGE_LUAD_T_NM_signed_rank=na.omit(DGE_LUAD_T_NM_signed_rank)


### Replace ensembl IDs with gene names
# Filter gene info to the ensembl IDs we want to replace
gene_info_DEGs <- gene_info %>%
  filter(gene_id %in% rownames(DGE_LUAD_T_NM_signed_rank))

# Sort both lists by the ensembl ID
gene_info_DEGs <- gene_info_DEGs %>% arrange(., gene_id)
DGE_LUAD_T_NM_signed_rank<- DGE_LUAD_T_NM_signed_rank %>% arrange(., rownames(.))

# Add gene names to the DEGs list, remove the rownames
DGE_LUAD_T_NM_signed_rank$Gene <- gene_info_DEGs$gene_name
rownames(DGE_LUAD_T_NM_signed_rank) <- NULL


### Filter to genes below FDR < 0.05 ###
fdrThres=0.05
DGE_LUAD_T_NM_signed_rank_sig <- DGE_LUAD_T_NM_signed_rank[DGE_LUAD_T_NM_signed_rank$FDR<fdrThres,]

nrow(DGE_LUAD_T_NM_signed_rank_sig) # 13465 (2024/11/07)
## [1] 13465

2.3.2 Visualization of DEGs (volcano plot)

log2FC_cutoff2 <- 1

v2 <- EnhancedVolcano::EnhancedVolcano(
  toptable = DGE_LUAD_T_NM_signed_rank,
  lab = DGE_LUAD_T_NM_signed_rank$Gene,
  x = "log2foldChange",
  y = "FDR", 
 # pCutoffCol = 'min_smoothed_fdr',
  xlab = "log2FC",
  ylab = "-log10(FDR)",
  title = "TE DEGs",
  subtitle = paste0("log2FC cutoff: ", log2FC_cutoff2),
  caption = paste0("Total = ", nrow(DGE_LUAD_T_NM_signed_rank_sig[abs(DGE_LUAD_T_NM_signed_rank_sig$log2foldChange)>log2FC_cutoff2,]), " significant DEGs above log2FC cutoff"),
  col = c("grey30", "mediumpurple2", "royalblue", "orange2"),
  legendPosition = "bottom",
  labSize = 3,
  max.overlaps = 10,
  drawConnectors = TRUE,
  arrowheads = FALSE,
  pCutoff = 0.05,
  FCcutoff = log2FC_cutoff2,
  gridlines.minor = FALSE,
  gridlines.major = FALSE,
  #xlim = c(-3, 6)
  ylim = c(0,10)
)

v2
## Warning: ggrepel: 4772 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

2.3.3 Differential expression analysis using DESEq2, and filter to FDR < 0.05

I have been told that the Wilcoxon signed-rank test may be much more suitable for assessing a handful of genes rather than whole-transcriptome analysis. DESeq2 is more typically used for the latter, despite the finding of the publication listed above. I will compare the results using DESeq2.

library(DESeq2)
## Warning: package 'DESeq2' was built under R version 4.3.3
library(apeglm)

readCount <- as.matrix(counts_matched_T_NM)

# Removing the outlier samples from the sample info and setting rownames as sample names
sample_info_matched_T_NM <- sample_info_matched_T_NM %>% 
  dplyr::filter(., sample != c("TCGA-38-4626-01A","TCGA-38-4626-11A"))
rownames(sample_info_matched_T_NM) <- sample_info_matched_T_NM$sample

# Checking the sample names are in the same order
all(colnames(readCount)==rownames(sample_info_matched_T_NM))
## [1] TRUE
# Preparing and performing DESeq
dds <- DESeqDataSetFromMatrix(countData = readCount,
                              colData = sample_info_matched_T_NM,
                              design= ~ definition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
# Filter out rows with less than 10 total counts in the smallest sample group size (114/2 = 57)
keep <- rowSums(counts(dds) >= 10) >= 57
dds <- dds[keep,]

# Set the reference level as the normal tissue - 
dds$definition <- relevel(dds$definition, ref = "Solid Tissue Normal")

# Perform differential expression analysis
dds <- DESeq(dds)
## estimating size factors
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
## fitting model and testing
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## -- replacing outliers and refitting for 754 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
resultsNames(dds) # lists the coefficients
## [1] "Intercept"                                            
## [2] "definition_Primary.solid.Tumor_vs_Solid.Tissue.Normal"
# res <- results(dds, 
#                name="definition_Primary.solid.Tumor_vs_Solid.Tissue.Normal"
#                )

# or to shrink log fold changes association with condition:
res <- lfcShrink(dds, 
                coef="definition_Primary.solid.Tumor_vs_Solid.Tissue.Normal", 
                type="apeglm")
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
##     sequence count data: removing the noise and preserving large differences.
##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
summary(res)
## 
## out of 19062 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 9099, 48%
## LFC < 0 (down)     : 5225, 27%
## outliers [1]       : 0, 0%
## low counts [2]     : 0, 0%
## (mean count < 10)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
res_table <- as.data.frame(cbind( log2FC = res$log2FoldChange, FDR = res$padj, base_mean = res$baseMean))

### Replace ensembl IDs with gene names
res_table <- res_table %>% arrange(., rownames(.))
gene_info_sorted <- gene_info %>% 
  arrange(., gene_id) %>%
  filter(gene_id %in% rownames(res_table))

res_table$gene <- gene_info_sorted$gene_name

2.3.4 Filtering the results table [[work in progress]]

plotMA(res, 
       ylim = c(-10, 15),  # Adjust y-axis limits for clarity
       alpha = 0.05)    # Highlights significant genes (default: FDR ≤ 0.1)

FDR_min <- 0.05

# Filter to significant genes and remove NA values
res_table_sig <- res_table[res_table$FDR<=FDR_min,]
res_table_sig <- res_table_sig[complete.cases(res_table_sig),]

# Filtering by log2FC values
nrow(res_table_sig)
## [1] 13493
hist(res_table_sig$log2FC, breaks = 100) # Looks pretty normally distributed, and cutoff of 1 would remove a lot

nrow(res_table_sig[abs(res_table_sig$log2FC)>1,]) # log2FC cutoff of 1 would be pretty good
## [1] 4542
res_table_sig_cutoff1 <- res_table_sig[abs(res_table_sig$log2FC)>1,]
nrow(res_table_sig_cutoff1) # 4542 is a solid number
## [1] 4542
DGE_LUAD_T_NM_DESeq2_sig <- res_table_sig
DGE_LUAD_T_NM_DESeq2_sig_cutoff1 <- res_table_sig_cutoff1

2.3.5 Visualization of DEGs from DESEq2 method (volcano plot)

log2FC_cutoff <- 1
FDR_cutoff<- 0.05

v3 <- EnhancedVolcano::EnhancedVolcano(
  toptable = res_table,
  lab = res_table$gene,
  x = "log2FC",
  y = "FDR", 
 # pCutoffCol = 'min_smoothed_fdr',
  xlab = "log2FC",
  ylab = "-log10(FDR)",
  title = "TE DEGs",
  subtitle = paste0("log2FC cutoff: ", log2FC_cutoff),
  caption = paste0("Total = ", nrow(res_table_sig[abs(res_table_sig$log2FC)>log2FC_cutoff,]), " significant DEGs above log2FC cutoff"),
  col = c("grey30", "mediumpurple2", "royalblue", "orange2"),
  legendPosition = "bottom",
  labSize = 3,
  max.overlaps = 10,
  drawConnectors = TRUE,
  arrowheads = FALSE,
  pCutoff = FDR_cutoff,
  FCcutoff = log2FC_cutoff,
  gridlines.minor = FALSE,
  gridlines.major = FALSE,
  #xlim = c(-3, 6)
  ylim = c(0,10)
)

v3
## Warning: ggrepel: 1308 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 2.4 Checking the outputs of the two methods

library(ggvenn)
## Loading required package: scales
DGE_LUAD_T_NM_signed_rank_sig <- read.table("../2_Outputs/4_Tumor_DEGs/DGE_LUAD_T_NM_signed_rank_sig_20241107.txt")

# Define the gene lists
DESeq2_list <- DGE_LUAD_T_NM_DESeq2_sig$gene
signed_rank_list <- DGE_LUAD_T_NM_signed_rank_sig$Gene

library(ggvenn)

# Define the gene lists in a named list
gene_lists <- list(
  "DESeq2_list" = DESeq2_list,
  "signed_rank_list" = signed_rank_list
)

# Create the Venn diagram
ggvenn::ggvenn(gene_lists, fill_color = c("blue", "red"))

## ~80% agreement between the lists is really good. Since there is better justification for using DESeq2, I should probably use that one.

2.5 Saving outputs

# Change date suffix as appropriate if modifications are made
#write.table(DGE_LUAD_T_NM_signed_rank_sig, "../2_Outputs/DGE_LUAD_T_NM_signed_rank_sig_20241107.txt", sep = '\t')

# write.table(res_table, "../2_Outputs/4_Tumor_DEGs/DGE_LUAD_T_NM_DSeq2_20241127.txt", sep = '\t')
# write.table(res_table_sig_cutoff1, "../2_Outputs/4_Tumor_DEGs/DGE_LUAD_T_NM_DSeq2_sig_cutoff1_20241127.txt", sep = '\t')
# write.table(res_table_sig, "../2_Outputs/4_Tumor_DEGs/DGE_LUAD_T_NM_DSeq2_sig_20241127.txt", sep = '\t')

3. Differential methylation analysis of TCGA-LUAD tumor vs normal tissue (T-E)

I downloaded this level 3 methylation 450k data from cBioPortal, from TCGA Lung Adenocarcinoma (Firehose Legacy) https://www.cbioportal.org/study/summary?id=luad_tcga (Accessed 2023/08/29) Note that this provides gene information but not probe information. I did a lot of work trying to do the analysis starting from probe level information, but ultimately decided to stick with this.

3.1 Loading dataset

data_methylation_hm450_tumor <- read.table("../../Former_Smokers_Aim_2/1_TCGA_LUAD_multiomics/0_Unpaired_input_tables/data_methylation_hm450.txt", header=TRUE, fill=TRUE)

data_methylation_hm450_normal <- read.table("../../Former_Smokers_Aim_2/1_TCGA_LUAD_multiomics/0_Unpaired_input_tables/data_methylation_hm450_normals.txt", header=TRUE, fill=TRUE)

3.2 Formatting dataset

3.2.1 Formatting counts in tumor-normal pairs

allIDs_tumor  <- colnames(data_methylation_hm450_tumor)
allIDs_normal <- colnames(data_methylation_hm450_normal)

#Listing IDs of tumors that have matched normals by changing the tissue ID to the "tumor" identifier, "01", for matching purposes.
IDs_tumor_with_matches <-gsub(".11",".01", allIDs_normal)

#Make a table of the methylation data for tumor samples only with matching normal data.
#
data_methylation_hm450_tumor_with_matches <- data_methylation_hm450_tumor %>%
  dplyr::select(any_of(IDs_tumor_with_matches))

#Make a table of the methylation data for normal samples only with matching tumor data.
# Note that 3 of the normal samples don't have a matching tumor sample:
#`TCGA.44.2655.01`, `TCGA.44.2659.01`, and `TCGA.44.2662.01` don't exist.
data_methylation_hm450_normal_with_matches <- data_methylation_hm450_normal %>%
  dplyr::select(-c('TCGA.44.2655.11', 'TCGA.44.2659.11','TCGA.44.2662.11'))

#Make a combined table of matched tumor-normal samples.
data_methylation_hm450_tumor_normal_matched <- cbind(data_methylation_hm450_tumor_with_matches, data_methylation_hm450_normal_with_matches)[order(c(1:31,1:31))]

#Remove duplicate gene ID column and the entrez ID columns
data_methylation_hm450_tumor_normal_matched <- data_methylation_hm450_tumor_normal_matched[,-c(1,3,4)]

3.2.2 Giving suffixes to duplicate genes

# I want to make the gene names into row names, but I cannot because some gene names appear twice.
# So, I will rename them with indexes _1 and _2 and figure out  why they appeared twice later.

#Checking rows of the gene names with duplicates:
checking_dups <- data_methylation_hm450_tumor_normal_matched[data_methylation_hm450_tumor_normal_matched$Hugo_Symbol.1 %in% c("AGER", "CX3CR1", "F2R", "GADL1", "GCOM1", "KLK10", "PALM2AKAP2", "QSOX1", "RCC1"),]

# I see that these are not identical rows - the methylation values are different. So, I will go ahead and add indexes.
checking_dups <- checking_dups[order(checking_dups$Hugo_Symbol.1),]#Sort by gene name

checking_dups <- cbind(rownames(checking_dups), checking_dups[,1]) #Make table of just the gene names and row names of the original file

checking_dups[,2] <- paste(checking_dups[,2],1:2,sep="_") # Add a suffix to the gene names

#Replace the gene names in the T-NM matched file with the suffixed gene names
data_methylation_hm450_tumor_normal_matched[checking_dups[,1],1] <- checking_dups[,2]

#Now that there are no longer duplicates, make the gene names column into the row names and remove the gene names column.
rownames(data_methylation_hm450_tumor_normal_matched) <- data_methylation_hm450_tumor_normal_matched[,1]
data_methylation_hm450_tumor_normal_matched <- data_methylation_hm450_tumor_normal_matched[,2:59]

3.3 Quality control checks

hist(as.matrix(data_methylation_hm450_tumor[3:length(data_methylation_hm450_tumor)]))

max(data_methylation_hm450_tumor[3:length(data_methylation_hm450_tumor)])
## [1] NA
min(data_methylation_hm450_tumor[3:length(data_methylation_hm450_tumor)])
## [1] NA
boxplot(data_methylation_hm450_tumor[3:length(data_methylation_hm450_tumor)])

hist(as.matrix(data_methylation_hm450_normal[3:length(data_methylation_hm450_normal)]))

boxplot(data_methylation_hm450_normal[3:length(data_methylation_hm450_normal)])

hist(as.matrix(data_methylation_hm450_tumor_normal_matched))

boxplot(data_methylation_hm450_tumor_normal_matched)

# This is definitely not a normal distribution, but the wilcoxon signed-rank test does not assume a normal distribution. However, maybe this could indicate an issue with the original files? Edit: Beta values normally have a bimodal distribution so it's not really unusual

3.4 Preprocessing (Conversion to M values)

# Shorter name for convenience
methyl_beta <- data_methylation_hm450_tumor_normal_matched

# Convert to M values
methyl_M=log2(methyl_beta/(1-methyl_beta))

3.5 Differential analysis and filtering to FDR < 0.05

# Function to remove a tumor-normal pair if one of them has an NA value. Used in the subsequent Wilcox signed-rank test.

remove_NA_pairs <- function(my_data) {
  valid_columns <- c()
  # Iterate through columns in pairs
  for (i in seq(1, ncol(my_data), by = 2)) {
    tumor_col <- my_data[, i]
    normal_col <- my_data[, i+1]
    # Check for NAs in the pair of columns
    if (!any(is.na(tumor_col)) && !any(is.na(normal_col))) {
      valid_columns <- c(valid_columns, i, i+1)
    }
  }
  # Subset the data frame using valid column indices
  result_data <- my_data[, valid_columns]
  return (result_data)
}


### Wilcoxon signed-rank test ###

#Run the Wilcoxon signed-rank test for each gene. 
#Paired=TRUE specifies signed-rank, na.action=na.fail specifies that an error message will be thrown if NAs are still remaining after the filtering step.

pvalues <- sapply(1:nrow(methyl_M),function(i){
     M_values <- methyl_M[i,]
     
     #Remove values from tumor-normal pairs if either of them is NA
     M_values <- remove_NA_pairs(M_values)
     
     #Make group labels to differentiate tumor and normal
     group <- rep(c(1,2),length(M_values))
     data<-cbind.data.frame(gene=as.numeric(t(M_values)),group)
     
     p=wilcox.test(gene~group, data, paired = TRUE, na.action = na.fail)$p.value
     return(p)
   })
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
#Note that if you use don't use the remove_NA_pairs filtering step, and you use na.action=na.pass, you also don't get any error messages, and it's possible that this also effectively skips over pairs with NAs. I just couldn't find a detailed explanation of how na.pass worked in this case, so I filtered first using my own remove_NA_pairs function.

#Return false discovery rate-corrected P-values
fdr=p.adjust(pvalues,method = "fdr")

#Calculate log2 fold change
tumor_indexes = seq(1, ncol(methyl_M), by = 2)
normal_indexes = seq(2, ncol(methyl_M), by = 2)

tumor_values=methyl_beta[,tumor_indexes]
normal_values=methyl_beta[,normal_indexes]
foldChanges=log2(rowMeans(tumor_values, na.rm = TRUE)/rowMeans(normal_values, na.rm = TRUE))

# Output results to a unified table!
DMeth_LUAD_T_NM_hm450 <-data.frame(log2foldChange=foldChanges, pValues=pvalues, FDR=fdr)
DMeth_LUAD_T_NM_hm450$Gene <- rownames(methyl_beta)
rownames(DMeth_LUAD_T_NM_hm450) <- NULL
DMeth_LUAD_T_NM_hm450=na.omit(DMeth_LUAD_T_NM_hm450) # Remove any NAs

# Check on the duplicated genes, make list of the pairs that had higher FDR values for removal
library(stringr)
DMeth_LUAD_T_NM_hm450_dups_to_rm <- DMeth_LUAD_T_NM_hm450 %>%
  filter(str_detect(Gene, "_")) %>%# Filter to the genes I added a _1 or _2 suffix to
  arrange(., Gene) %>% # Sort by gene name
  mutate(pair_id = rep(1:(n() / 2), each = 2)) %>% # Add a pair ID column
  group_by(., pair_id) %>%
  filter(FDR==max(FDR)) %>% # Get the maximum FDR values of the pairs
  ungroup()

# Remove the less significant duplicate genes 
DMeth_LUAD_T_NM_hm450 <- DMeth_LUAD_T_NM_hm450 %>%
  filter(!(Gene %in% DMeth_LUAD_T_NM_hm450_dups_to_rm$Gene))

#Remove the "_n" suffixes from the remaining genes of the pair
DMeth_LUAD_T_NM_hm450 <- DMeth_LUAD_T_NM_hm450 %>%
  mutate(Gene = str_remove(Gene, "_.*"))

# Keep the genes with FDR<0.05
fdrThres=0.05
DMeth_LUAD_T_NM_hm450_sig <- DMeth_LUAD_T_NM_hm450[DMeth_LUAD_T_NM_hm450$FDR<fdrThres,]

nrow(DMeth_LUAD_T_NM_hm450_sig) # 9868 (2024/11/08 PM)
## [1] 9868

3.6 Saving outputs

# Change date suffix as appropriate if modifications are made
write.table(DMeth_LUAD_T_NM_hm450_sig, "../2_Outputs/DMeth_LUAD_T_NM_hm450_sig_20241108_PM.txt", sep = '\t')

3.7 Visualizing DMGs as volcano plot

log2FC_cutoff3 <- 0.3

v3 <- EnhancedVolcano::EnhancedVolcano(
  toptable = DMeth_LUAD_T_NM_hm450,
  lab = DMeth_LUAD_T_NM_hm450$Gene,
  x = "log2foldChange",
  y = "FDR", 
 # pCutoffCol = 'min_smoothed_fdr',
  xlab = "log2FC",
  ylab = "-log10(FDR)",
  title = "TM DMGs",
  subtitle = paste0("log2FC cutoff: ", log2FC_cutoff3),
  caption = paste0("Total = ", nrow(DMeth_LUAD_T_NM_hm450_sig[abs(DMeth_LUAD_T_NM_hm450_sig$log2foldChange)>log2FC_cutoff3,]), " significant DEGs above log2FC cutoff"),
  col = c("grey30", "mediumpurple2", "royalblue", "orange2"),
  legendPosition = "bottom",
  labSize = 3,
  max.overlaps = 10,
  drawConnectors = TRUE,
  arrowheads = FALSE,
  pCutoff = 0.05,
  FCcutoff = log2FC_cutoff3,
  gridlines.minor = FALSE,
  gridlines.major = FALSE,
  xlim = c(-2, 4),
  ylim = c(0,10)
)

v3
## Warning: ggrepel: 3947 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

3.8 Extra checks

4. Differential expression analysis of reference reference “persistent” airway current vs former vs never smoker dataset (A2)

4.1 Loading dataset

# load series and platform data from GEO

gset <- getGEO("GSE7895", GSEMatrix =TRUE, AnnotGPL=TRUE)
## Found 1 file(s)
## GSE7895_series_matrix.txt.gz
if (length(gset) > 1) idx <- grep("GPL96", attr(gset, "names")) else idx <- 1
gset <- gset[[idx]]

# make proper column names to match toptable 
fvarLabels(gset) <- make.names(fvarLabels(gset))

# group membership for all samples
gsms <- paste0("22222222222222222222200000000000000000000000000000",
               "00000000000000000000000111111111111111111111111111",
               "1111")
sml <- strsplit(gsms, split="")[[1]]

gset <- gset[complete.cases(exprs(gset)), ] # skip missing values

# assign samples to groups and set up design matrix
gs <- factor(sml)
groups <- make.names(c("current_smoker","former_smoker","never_smoker"))
levels(gs) <- groups
gset$group <- gs
design <- model.matrix(~group + 0, gset)
colnames(design) <- levels(gs)

gset <- gset[complete.cases(exprs(gset)), ] # skip missing values

4.2 Quality control checks

4.2.1 Initial checks (histogram, boxplot, PCA)

## Make histograms and boxplots to check if the data is log-transformed and needs quantile normalization ##

hist(as.matrix(exprs(gset))) # Values range 1-15, and 1 big peak around 3. 

boxplot(exprs(gset)) # Same range of values, with similar-looking ranges, but not exactly the same

# Narrow range, therefore no log2 normalization needed

exprs(gset) <- normalizeBetweenArrays(exprs(gset))
boxplot(exprs(gset))

# 2024/11/12: I elected to do quantile normalization because this gave me a larger list of "persistent" genes. Could justify that it "better captures the variation between groups" etc

min(exprs(gset))
## [1] -0.2140344
max(exprs(gset))
## [1] 14.59784
## Plot PCA ##
colz <- as.numeric(as.factor(gs)) # Get color values from group
plotMDS(exprs(gset),
        gene.selection = "common",
        main = "PCA for GSE7895",
        col = colz,
        pch = 1
        #labels = gs
        )

legend("topright", legend = levels(as.factor(gs)), 
       fill = unique(colz), 
       title = "Smoking status")

# No separation, all mixed up. This isn't a good look.

4.2.2 Investigating source of variation

Extract and format phenotypic data

library(stringr)

phenotypic_data <- pData(gset)  # Extract phenotypic data

# List of column names I want to keep and clean up into usable labels
columns_to_find <- c("characteristics_ch1.1", "group")

# Get the column indexes
indexes <- sapply(columns_to_find, function(col_name) which(names(phenotypic_data) == col_name))
indexes <- unlist(indexes)

phenotypic_data <- phenotypic_data[,c(indexes)]

# Extract Age
phenotypic_data$age <- as.numeric(str_extract(phenotypic_data$characteristics_ch1.1, "(?<=Age:)\\d+"))

# Extract Packyears
phenotypic_data$packyears <- as.numeric(str_extract(phenotypic_data$characteristics_ch1.1, "(?<=Packyears:)\\d+"))

# Extract Time Since Quit Smoking (months)
phenotypic_data$TSQ_months <- as.numeric(str_extract(phenotypic_data$characteristics_ch1.1, "(?<=Time Since Quit Smoking \\(months\\):)\\d+"))

# Delete the original column with the unseparated info
phenotypic_data <- phenotypic_data[,-1]

# Convert the NA values for packyears for never smokers to zero (this makes sense since the never smokers have 0 pack years)
phenotypic_data$packyears[phenotypic_data$group=="never_smoker"] <- 0

# Convert the NA values for TSQ_months to zero for current smokers (again makes sense)
phenotypic_data$TSQ_months[phenotypic_data$group=="current_smoker"] <- 0

# Make column to denote just former smoking status for the linear model
phenotypic_data$former_smoking_status <- as.factor(as.numeric(phenotypic_data$group == "former_smoker"))

# Make column to denote just current smoking status for the linear model
phenotypic_data$current_smoking_status <- as.factor(as.numeric(phenotypic_data$group == "current_smoker"))

# Make column to denote just never smoking status for the linear model
phenotypic_data$never_smoking_status <- as.factor(as.numeric(phenotypic_data$group == "never_smoker"))

4.2.3 Plot PCA using other phenotypic data

## Plot PCA using age to define color
# Create a gradient color palette (light blue to dark blue)
palette <- colorRampPalette(c("lightblue", "darkblue"))

## Plot PCA of age ##
colz_age <- palette(length(phenotypic_data$age))[rank(phenotypic_data$age)]  # Map ages to gradient colors
plotMDS(exprs(gset),
        gene.selection = "common",
        main = "PCA for GSE7895 (darker blue ~ higher age)",
        col = colz_age,
        pch = 1
        )
# Add a color bar for age
legend("topright", legend = range(phenotypic_data$age), 
       fill = palette(2), 
       title = "Age")

# Does not seem to be an age effect




### Plot PCA of packyears ###

# Excluding packyears of zero (never smokers)
pheno_packyears <- phenotypic_data[phenotypic_data$packyears!=0,]
exprs_packyears <- as.data.frame(exprs(gset)) %>%
  dplyr::select(rownames(pheno_packyears))

colz_packyears <-  palette(length(pheno_packyears$packyears))[rank(pheno_packyears$packyears)] # Map packyears to gradient colors
plotMDS(exprs_packyears,
        gene.selection = "common",
        main = "PCA for GSE7895 (darker blue ~ higher packyears)",
        col = colz_packyears,
        pch = 1
        #labels = gs
        )
# Add a color bar for packyears
legend("topright", legend = range(pheno_packyears$packyears), 
       fill = palette(2), 
       title = "Packyears")

## Does not seem to be packyears effect


### Plot PCA of time since quitting ###
pheno_tsq <- phenotypic_data[!is.na(phenotypic_data$TSQ_months),]
exprs_tsq <- as.data.frame(exprs(gset)) %>%
  dplyr::select(rownames(pheno_tsq))

colz_TSQ <- palette(length(pheno_tsq$TSQ))[rank(pheno_tsq$TSQ)]  # Map packyears to gradient colors
plotMDS(exprs_tsq,
        gene.selection = "common",
        main = "PCA for GSE7895 (darker blue ~ more time since quitting)",
        col = colz_TSQ,
        pch = 1
        #labels = gs
        )
legend("topright", legend = range(pheno_tsq$TSQ), 
       fill = palette(2), 
       title = "TSQ")

## Does not seem to be TSQ effect

This is potentially problematic, but I propose that if the genes determined to be “persistent” can differentiate between the groups as expected in PCA, it will be evidence that the results are valid despite the groups not being differentiated by all the genes taken as a whole.

Note that I began trying to do this analysis accounting for pack years and TSQ (see other script), but for now I am just looking at the smoking status comparisons alone.

4.3 Differential expression analysis

v <- vooma(gset, design, plot=T)

v$genes <- fData(gset) # attach gene annotations


# fit linear model
fit  <- lmFit(v)

# set up contrasts of interest and recalculate model coefficients
#cts <- c(paste(groups[1],"-",groups[2],sep=""), paste(groups[1],"-",groups[3],sep=""), paste(groups[2],"-",groups[3],sep=""))
#cont.matrix <- makeContrasts(contrasts=cts, levels=design)
cont.matrix <- makeContrasts(
  CS_vs_NS = current_smoker - never_smoker,
  FS_vs_NS = former_smoker - never_smoker,
  CS_vs_FS = current_smoker - former_smoker,
  levels = design
)

fit2 <- contrasts.fit(fit, cont.matrix)


# compute statistics and table of top significant genes
fit2 <- eBayes(fit2, proportion = 0.01) # Proportion is "assumed proportion of genes which are differentially expressed"

4.4 Select “persistent” DEGs, and basic filter (keep lower FDR of duplicates, apply FDR < 0.05 cutoff)

library(dplyr)
library(VennDiagram)

## Separate out genes that are DEGS in CS vs NS and FS vs NS

## Note: I have decided not to filter out genes that are significantly different between CS and FS because I realized that doesn't make logical sense.

# summarize test results as "up", "down" or "not expressed"
dT <- decideTests(fit2, adjust.method="fdr", p.value=0.05, lfc=0)

# Venn diagram of results
vennDiagram(dT)

# Select the genes differentially expressed in both CS_vs_NS and FS_vs_NS
dT_persistent <- dT %>%
  as.data.frame(.) %>%
  filter(CS_vs_NS != 0) %>% # Differentially expressed in CS vs NS
  filter(CS_vs_NS == FS_vs_NS)# Differentially expressed, same sign in CS vs FS
nrow(dT_persistent) # 128 genes indeed
## [1] 128
# Get the toptable format for all genes
tT <- topTable(fit2, adjust="fdr", sort.by="B", number=Inf) # Inf shows all the significant genes

# Filter to the "persistent" genes
tT_persistent <- tT %>%
  filter(ID %in% rownames(dT_persistent))

# Filter out blanks, keep lower FDR of ties
tT_persistent <- tT_persistent %>%
  filter(Gene.symbol != "") %>% # Remove blank gene symbols
  filter(adj.P.Val <= 0.05) %>% # Remove FDR > 0.05 genes
  group_by(Gene.symbol) %>%
  slice_min(adj.P.Val, with_ties = TRUE) %>% 
  # For probesets mapping to same gene, keep one with lowest FDR. Keep ties for now to check later.
  ungroup()
nrow(tT_persistent)
## [1] 116
# Checking for ties
ties <- tT_persistent%>%
  group_by(Gene.symbol) %>%
  filter(n() > 1) %>%
  ungroup()
print(ties)
## # A tibble: 2 × 28
##   ID      Gene.title Gene.symbol Gene.ID UniGene.title UniGene.symbol UniGene.ID
##   <chr>   <chr>      <chr>       <chr>   <chr>         <chr>          <chr>     
## 1 214303… mucin 5AC… MUC5AC      4586    ""            ""             ""        
## 2 214385… mucin 5AC… MUC5AC      4586    ""            ""             ""        
## # ℹ 21 more variables: Nucleotide.Title <chr>, GI <int>,
## #   GenBank.Accession <chr>, Platform_CLONEID <lgl>, Platform_ORF <lgl>,
## #   Platform_SPOTID <chr>, Chromosome.location <chr>,
## #   Chromosome.annotation <chr>, GO.Function <chr>, GO.Process <chr>,
## #   GO.Component <chr>, GO.Function.ID <chr>, GO.Process.ID <chr>,
## #   GO.Component.ID <chr>, CS_vs_NS <dbl>, FS_vs_NS <dbl>, CS_vs_FS <dbl>,
## #   AveExpr <dbl>, F <dbl>, P.Value <dbl>, adj.P.Val <dbl>
# As there is a tie with MUCA5 I will remove the MUCA5 probe with an "x" label for cross-reactivity
tT_persistent <- tT_persistent %>% filter (ID != "214303_x_at")

#Pick the columns we care about
GSE7895_persistent_DEGs <- tT_persistent %>%
  dplyr::select(., Gene.symbol, CS_vs_NS, FS_vs_NS, CS_vs_FS, adj.P.Val) %>%
  dplyr::rename(., Gene = Gene.symbol, CS_NS_A2 = CS_vs_NS, FS_NS_A2 = FS_vs_NS, CS_FS_A2 = CS_vs_FS, FDR_A2 = adj.P.Val)

# Save output
write.table(GSE7895_persistent_DEGs, "../2_Outputs/1_Airway_DEGs/GSE7895_persistent_DEGs_20241127.txt")

4.5 Extra checks (PCA for stratification)

## Filter exprs to the "persistent" genes
exprs_persistent <- as.data.frame(exprs(gset)) %>%
  filter(rownames(.) %in% tT_persistent$ID)

## Plot PCA ##
colz<- as.numeric(as.factor(gs)) # Get color values from group
plotMDS(exprs_persistent,
        gene.selection = "common",
        main = "PCA for GSE7895 with persistent genes",
        col = colz,
        pch = 1
        #labels = gs
        )

legend("topright", legend = levels(as.factor(gs)), 
       fill = unique(colz), 
       title = "Smoking status")

# You can see more separation happening, but I would expect to see current and former smokers more mixed together, whereas we see former and never smokers more mixed together. Hmm okay, interesting at least.

# Might be good to check on the age, packyears and TSQ here as well?


## Plot PCA of age ##
colz_age <- palette(length(phenotypic_data$age))[rank(phenotypic_data$age)]  # Map ages to gradient colors
plotMDS(exprs_persistent,
        gene.selection = "common",
        main = "PCA for GSE7895 (darker blue ~ higher age)",
        col = colz_age,
        pch = 16
        )
# Add a color bar for age
legend("topright", legend = range(phenotypic_data$age), 
       fill = palette(2), 
       title = "Age")

# Does not seem to be an age effect

### Plot PCA of packyears ###

# Excluding packyears of zero (never smokers)
exprs_persistent_packyears <- as.data.frame(exprs_persistent) %>%
  dplyr::select(rownames(pheno_packyears))

colz_packyears <-  palette(length(pheno_packyears$packyears))[rank(pheno_packyears$packyears)] # Map packyears to gradient colors
plotMDS(exprs_persistent_packyears,
        gene.selection = "common",
        main = "PCA for GSE7895 persistent genes (darker blue ~ higher packyears)",
        col = colz_packyears,
        pch = 16
        #labels = gs
        )
# Add a color bar for packyears
legend("bottomleft", legend = range(pheno_packyears$packyears), 
       fill = palette(2), 
       title = "Packyears")

## Maybe some sort of packyears effect happening, not obviously so


### Plot PCA of time since quitting ###
exprs_persistent_tsq <- as.data.frame(exprs_persistent) %>%
  dplyr::select(rownames(pheno_tsq))

colz_TSQ <- palette(length(pheno_tsq$TSQ))[rank(pheno_tsq$TSQ)]  # Map packyears to gradient colors
plotMDS(exprs_persistent_tsq ,
        gene.selection = "common",
        main = "PCA for GSE7895 persistent genes (darker blue ~ more months since quitting)",
        col = colz_TSQ,
        pch = 16
        #labels = gs
        )
legend("bottomleft", legend = range(pheno_tsq$TSQ), 
       fill = palette(2), 
       title = "TSQ")

## Maybe some TSQ effect but not super obvious

5. Comparing and filtering A1, TE, TM, and A2 to generate “linked genes” and “linked persistent genes” lists

5.1.1 Reading in datasets to intersect

GSE63127_CS_NS_GEO2R_limma_sig <- read.table("../2_Outputs/1_Airway_DEGs/GSE63127_CS_NS_GEO2R_limma_sig_20241115.txt", header = TRUE)


DMeth_LUAD_T_NM_hm450_sig <- read.table("../2_Outputs/5_Tumor_DMGs/DMeth_LUAD_T_NM_hm450_sig_20241108_PM.txt")

5.1.2 Applying log2FC cutoffs

### Testing log2FC cutoffs ###

# A1 DEGs
log2Thres_A1=0
nrow(GSE63127_CS_NS_GEO2R_limma_sig[abs(GSE63127_CS_NS_GEO2R_limma_sig$logFC)>log2Thres_A1,])
## [1] 7105
# TCGA-LUAD DEGs
log2Thres_TE=0
nrow(DGE_LUAD_T_NM_signed_rank_sig[abs(DGE_LUAD_T_NM_signed_rank_sig$log2foldChange)>log2Thres_TE,])
## [1] 13465
# TCGA-LUAD DMGs
log2Thres_TM=0
nrow(DMeth_LUAD_T_NM_hm450_sig[abs(DMeth_LUAD_T_NM_hm450_sig$log2foldChange)>log2Thres_TM,])
## [1] 9868
## Notes on number of linked genes after filtering:
# 0, 0, 0 => 950 genes
# 0.2, 1, 0.3 => 143 genes
# 0.5, 2, 0.5 => 14 genes
# 0.5, 1, 0.5 => 26 genes (but still good correlation across board unlike with other options)


### Applying log2FC cutoffs and formatting consistently for merge ###

GSE63127_CS_NS_GEO2R_limma_sig_cutoff <- GSE63127_CS_NS_GEO2R_limma_sig %>%
  filter(abs(logFC)>log2Thres_A1) %>%
  dplyr::rename(Gene = Gene.symbol, FDR_A1 = adj.P.Val, log2FC_A1 = logFC) %>%
  dplyr::select(., Gene, FDR_A1, log2FC_A1)

DGE_LUAD_T_NM_signed_rank_sig_cutoff <- DGE_LUAD_T_NM_signed_rank_sig %>%
  filter(abs(log2foldChange)>log2Thres_TE) %>%
  dplyr::rename(., FDR_TE = FDR, log2FC_TE = log2foldChange) %>%
  dplyr::select(., Gene, FDR_TE, log2FC_TE)

DMeth_LUAD_T_NM_hm450_sig_cutoff <- DMeth_LUAD_T_NM_hm450_sig %>%
  filter(abs(log2foldChange)>log2Thres_TM) %>%
  dplyr::rename(., FDR_TM = FDR, log2FC_TM = log2foldChange) %>%
  dplyr::select(., Gene, FDR_TM, log2FC_TM)


## 2024/11/27 -- trying out the deseq2 T-E list for comparison
DGE_LUAD_T_NM_DESeq2_sig_cutoff <- DGE_LUAD_T_NM_DESeq2_sig %>%
  filter(abs(log2FC)>log2Thres_TE) %>%
  dplyr::rename(., FDR_TE = FDR, log2FC_TE = log2FC, Gene = gene) %>%
  dplyr::select(., Gene, FDR_TE, log2FC_TE)

5.2 Filtering by dataset intersection

5.2.1 Intersection of A1 and TE DEGs

## Merge the lists
# A1_TE_merged_DEGs <- GSE63127_CS_NS_GEO2R_limma_sig_cutoff %>%
#   inner_join(., DGE_LUAD_T_NM_signed_rank_sig_cutoff, by = "Gene") %>%
#   filter(sign(log2FC_A1)==sign(log2FC_TE)) # Filter to genes with same signs
#   
# nrow(A1_TE_merged_DEGs)

#2024/11/27: Using the DESeq2 list instead
A1_TE_merged_DEGs <- GSE63127_CS_NS_GEO2R_limma_sig_cutoff %>%
  inner_join(., DGE_LUAD_T_NM_DESeq2_sig_cutoff, by = "Gene") %>%
  filter(sign(log2FC_A1)==sign(log2FC_TE)) # Filter to genes with same signs

nrow(A1_TE_merged_DEGs)
## [1] 2264

5.2.2 Intersection of A1/TE DEGs with TM DMGs

## Merge the lists
A1_TE_TM_linked_genes <- A1_TE_merged_DEGs %>%
  inner_join(., DMeth_LUAD_T_NM_hm450_sig_cutoff, by = "Gene") %>%
  filter(sign(log2FC_A1)!=sign(log2FC_TM)) # Filter to genes with opposite sign
  
nrow(A1_TE_TM_linked_genes)
## [1] 944

Note: Maybe this should incorporate a Spearman correlation filter? But for this I would need to pair samples across the tumor datasets I think, at the expression/M value level. So you can get an idea of the broader correlation with the log2FC values but to apply it more broadly you need the initial tables instead.

5.2.3 Visualizing Spearman correlations of log2FC values

## Visualizing Spearman correlations
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
ggpairs(A1_TE_TM_linked_genes[, c("log2FC_A1", "log2FC_TE", "log2FC_TM")],
  upper = list(continuous = wrap("cor", method = "spearman")),
  lower = list(continuous = "points", combo = "facethist", discrete = "facetbar", na ="na"),
  diag = list(continuous = "densityDiag", discrete = "barDiag", na = "naDiag"))

5.2.4 Visualizing multiple linear regression of the log2FC values

mlr_airway_model <- lm(log2FC_A1 ~ log2FC_TE + log2FC_TM, data = A1_TE_TM_linked_genes)
summary(mlr_airway_model)
## 
## Call:
## lm(formula = log2FC_A1 ~ log2FC_TE + log2FC_TM, data = A1_TE_TM_linked_genes)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.52245 -0.12802 -0.02276  0.09615  1.96406 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.048916   0.009493   5.153 3.12e-07 ***
## log2FC_TE    0.200719   0.008084  24.830  < 2e-16 ***
## log2FC_TM   -0.222290   0.021508 -10.335  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2818 on 941 degrees of freedom
## Multiple R-squared:  0.6143, Adjusted R-squared:  0.6135 
## F-statistic: 749.3 on 2 and 941 DF,  p-value: < 2.2e-16
library(scatterplot3d)
s3d <- scatterplot3d(x = A1_TE_TM_linked_genes$log2FC_A1, 
              y = A1_TE_TM_linked_genes$log2FC_TE, 
              z = A1_TE_TM_linked_genes$log2FC_TM,
              main="Plotting airway expression vs. tumor expression vs. tumor methylation",
              xlab = "log2FC(TE)",
              ylab = "log2FC(TM)",
              zlab = "log2FC(A1)",
              pch = 19,
              #color = colors_3d, # getting color values from methylation sign
              color = "steelblue",
              labels = A1_TE_TM_linked_genes$Gene,
              type = "h",
              #highlight.3d = TRUE, 
              angle = 60
              )
## Warning in title(main, sub, ...): "labels" is not a graphical parameter
## Warning in segments(x, z, x, z2, col = col, cex = cex.symbols, lty = lty.hplot,
## : "labels" is not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "labels" is not a
## graphical parameter
s3d$plane3d(mlr_airway_model) # Add a plane based on the multiple linear regression model

5.3 Comparing and filtering A2 and “linked” genes to give “persistent linked genes”

5.3.1 Applying log2FC cutoffs

# For now, no cutoffs

5.3.2 Filtering by dataset intersection

A1_TE_TM_A2_persistent_linked_genes <- A1_TE_TM_linked_genes %>%
  dplyr::inner_join(., GSE7895_persistent_DEGs, by = "Gene") %>%
  dplyr::filter(sign(log2FC_A1) == sign(CS_NS_A2)) # Ensure signs are the same between A1 and A2 CS vs NS

A1_TE_TM_A2_persistent_linked_genes
##        Gene       FDR_A1  log2FC_A1       FDR_TE  log2FC_TE       FDR_TM
## 1     ALDOA 2.566855e-09  0.3770692 1.472728e-19  1.1579009 1.321686e-04
## 2      CA12 8.257875e-09  0.5580391 1.216350e-05  1.1408287 1.036252e-04
## 3    CCDC81 3.903929e-05 -0.3596527 4.544984e-10 -1.7577192 1.337249e-08
## 4   CEACAM5 3.601041e-21  2.4094841 1.052641e-26  4.2113900 2.014160e-04
## 5   CEACAM6 5.102659e-22  0.8945448 4.021570e-12  1.7217246 3.898486e-07
## 6   CTNNAL1 9.011552e-08 -0.2950499 3.863514e-17 -1.2043124 1.261212e-05
## 7     DEFB1 5.131697e-09  0.9223731 1.017661e-02  0.8351949 1.032043e-05
## 8    EFEMP1 1.212608e-05 -0.3657118 1.052726e-19 -1.4834837 6.903960e-08
## 9     EPAS1 2.834268e-12 -0.4954380 1.041468e-90 -2.7396724 1.496168e-05
## 10 FAM189A2 2.483296e-07 -0.2839072 1.861610e-72 -3.0991189 4.821474e-06
## 11     GMDS 4.611052e-10  0.6057314 1.519206e-28  1.4970196 3.393904e-08
## 12     GPX2 1.654570e-45  2.5506583 1.570672e-47  5.9339828 1.643197e-06
## 13  HTATIP2 3.945663e-29  0.9111148 9.266875e-07  0.5942826 2.582063e-07
## 14     LMO2 1.571661e-03 -0.3886250 5.096911e-49 -1.7873761 2.616719e-09
## 15   MBOAT7 2.914710e-12  0.5137116 2.262034e-05  0.4549056 1.643197e-06
## 16  N4BP2L1 1.356102e-06 -0.3125984 9.495642e-18 -1.1038533 4.206529e-08
## 17     NQO1 5.660885e-42  1.5172869 5.694570e-34  3.2010657 2.561175e-09
## 18    PTPRM 1.154749e-07 -0.3737652 2.245852e-18 -1.2072743 5.703237e-08
## 19     PYGB 1.305714e-02  0.3756975 6.681797e-03  0.3343992 3.084027e-09
## 20     RERE 7.996695e-04 -0.1771604 2.536276e-03 -0.2742240 2.812878e-05
## 21    S100P 3.017522e-10  0.6501092 4.170991e-40  4.8622959 1.705580e-06
## 22 SERPINB5 3.053668e-02  0.3601096 1.449405e-26  4.5919694 2.561175e-09
## 23   TALDO1 7.040113e-31  0.8134359 1.270123e-04  0.6705743 6.500921e-05
## 24     TLE1 1.734041e-12  0.5240967 1.366377e-03  0.3583536 1.049490e-03
## 25  TMPRSS4 2.289513e-07  0.4506248 2.938960e-68  4.8578293 1.358720e-07
## 26     TNS1 1.227499e-04 -0.2749736 1.376634e-64 -2.1624629 2.800355e-08
## 27      TXN 3.094024e-14  0.4572731 7.159367e-07  0.8241842 3.858088e-09
##      log2FC_TM   CS_NS_A2   FS_NS_A2     CS_FS_A2       FDR_A2
## 1  -0.16059240  0.4032802  0.3407331  0.062547093 2.776254e-03
## 2  -0.14627531  1.5430724  1.2437664  0.299306037 2.254072e-05
## 3   0.78081385 -1.2597645 -1.2047266 -0.055037898 3.951014e-03
## 4  -0.12273499  3.0351572  2.2437012  0.791456072 1.351057e-08
## 5  -0.23969715  0.9338987  0.6059897  0.327909015 5.320093e-06
## 6   0.08931844 -1.2184931 -1.0133587 -0.205134470 1.135857e-03
## 7  -0.18090111  1.9923476  1.5363189  0.456028674 3.839433e-05
## 8   0.51869617 -0.8270279 -0.9278730  0.100845102 7.527296e-03
## 9   0.27037980 -0.7916370 -0.7853296 -0.006307385 9.672029e-05
## 10  0.20654108 -0.2321193 -0.2270856 -0.005033630 1.191916e-02
## 11 -0.26437554  0.7874442  0.6092521  0.178192117 1.127664e-04
## 12 -0.33166918  2.9950716  1.5416876  1.453383958 8.082128e-17
## 13 -0.28846048  1.1208826  0.5714055  0.549477174 3.952368e-12
## 14  2.40759033 -0.6292862 -0.5040922 -0.125194010 1.146276e-03
## 15 -0.22613719  0.4638591  0.5691029 -0.105243813 1.670886e-02
## 16  0.62487237 -0.4475380 -0.4545522  0.007014150 1.620992e-03
## 17 -0.57757388  2.0600457  0.9921077  1.067938017 8.749026e-18
## 18  0.56915956 -0.2356382 -0.2915727  0.055934474 2.009266e-02
## 19 -0.27504588  0.2738300  0.3489422 -0.075112202 2.863405e-02
## 20  0.37479080 -0.3033106 -0.4886218  0.185311261 2.502948e-03
## 21 -0.29162868  0.8341189  0.6625807  0.171538228 7.117128e-05
## 22 -0.53839643  0.9229824  1.0296751 -0.106692664 2.566330e-02
## 23 -0.08999397  1.2640972  0.6498642  0.614233002 1.138555e-12
## 24 -0.15290994  0.6730828  0.5935444  0.079538406 2.353419e-04
## 25 -0.35746941  0.8303343  0.7097247  0.120609563 9.792030e-05
## 26  1.21544680 -0.7633155 -0.6635958 -0.099719752 1.752972e-04
## 27 -0.34520577  1.1010514  0.6418392  0.459212192 3.161799e-08
## Recording results based on cutoffs (when I was using the signed-rank test for TE - now I use DESeq2)
## 0.5,1,0.5,0 => 1 (NQO1)
## 0,0,0,0 => 27
## 0.2, 1, 0.3 => 10

5.3.3 Additional checks (correlation of log2FCs in A1 and A2)

cor.test(A1_TE_TM_A2_persistent_linked_genes$log2FC_A1, y = A1_TE_TM_A2_persistent_linked_genes$CS_NS_A2, use = "everything",
    method = "spearman")
## 
##  Spearman's rank correlation rho
## 
## data:  A1_TE_TM_A2_persistent_linked_genes$log2FC_A1 and A1_TE_TM_A2_persistent_linked_genes$CS_NS_A2
## S = 292, p-value = 9.054e-07
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.9108669
ggplot(A1_TE_TM_A2_persistent_linked_genes, aes(x = log2FC_A1, y = CS_NS_A2), 
      geom_point(color = "blue", size = 3) +                # Scatter points
      geom_smooth(method = "lm", se = FALSE, color = "red")) # Add a trend line

ggplot(A1_TE_TM_A2_persistent_linked_genes, aes(x = log2FC_A1, y = CS_NS_A2)) +
  geom_point(color = "blue", size = 3) +                # Scatter points
  geom_smooth(method = "lm", se = FALSE, color = "black") + # Add a trend line +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

2024/11/27: Here is a very significant interaction network I got when running using the DESeq2 TE with no cutoffs: https://version-12-0.string-db.org/cgi/network?networkId=baZv6RvUQLCS

Saving files

# write.table(A1_TE_TM_linked_genes, "../2_Outputs/A1_TE_TM_linked_genes_nocutoffs_20241112.txt", sep = '\t')
#write.table(A1_TE_TM_A2_persistent_linked_genes, "../2_Outputs/A1_TE_TM_A2_persistent_linked_genes_DESeq2_nocutoffs_20241128.txt", sep = '\t')